feat: restore zhihu browser skills

Reconnect the recovered Zhihu skill flows to the live browser runtime and resolve their resources relative to the executable so they work outside the repo root.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
木炎
2026-03-27 14:29:38 +08:00
parent b87968632a
commit 6aad2ce48e
32 changed files with 7607 additions and 146 deletions

344
Cargo.lock generated
View File

@@ -33,6 +33,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"getrandom 0.3.4",
"once_cell", "once_cell",
"version_check", "version_check",
"zerocopy", "zerocopy",
@@ -328,6 +329,12 @@ version = "1.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "byteorder-lite" name = "byteorder-lite"
version = "0.1.0" version = "0.1.0"
@@ -418,7 +425,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
dependencies = [ dependencies = [
"chrono", "chrono",
"phf", "phf 0.12.1",
] ]
[[package]] [[package]]
@@ -604,12 +611,46 @@ dependencies = [
"typenum", "typenum",
] ]
[[package]]
name = "cssparser"
version = "0.31.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.11.3",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn",
]
[[package]] [[package]]
name = "data-encoding" name = "data-encoding"
version = "2.10.0" version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
[[package]]
name = "derive_more"
version = "0.99.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "dialoguer" name = "dialoguer"
version = "0.12.0" version = "0.12.0"
@@ -675,6 +716,21 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "dtoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]] [[package]]
name = "dunce" name = "dunce"
version = "1.0.5" version = "1.0.5"
@@ -687,6 +743,12 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
[[package]]
name = "ego-tree"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
[[package]] [[package]]
name = "either" name = "either"
version = "1.15.0" version = "1.15.0"
@@ -839,6 +901,16 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.32" version = "0.3.32"
@@ -936,6 +1008,15 @@ dependencies = [
"thread_local", "thread_local",
] ]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "generic-array" name = "generic-array"
version = "0.14.7" version = "0.14.7"
@@ -946,6 +1027,15 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.17" version = "0.2.17"
@@ -1089,6 +1179,20 @@ dependencies = [
"windows-link", "windows-link",
] ]
[[package]]
name = "html5ever"
version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "http" name = "http"
version = "1.4.0" version = "1.4.0"
@@ -1543,6 +1647,12 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "mail-parser" name = "mail-parser"
version = "0.11.2" version = "0.11.2"
@@ -1552,6 +1662,20 @@ dependencies = [
"hashify", "hashify",
] ]
[[package]]
name = "markup5ever"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf 0.11.3",
"phf_codegen 0.11.3",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]] [[package]]
name = "matchers" name = "matchers"
version = "0.2.0" version = "0.2.0"
@@ -1632,6 +1756,12 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11ec1bc47d34ae756616f387c11fd0595f86f2cc7e6473bde9e3ded30cb902a1" checksum = "11ec1bc47d34ae756616f387c11fd0595f86f2cc7e6473bde9e3ded30cb902a1"
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@@ -1737,13 +1867,103 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_shared 0.10.0",
]
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared 0.11.3",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.12.1" version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
dependencies = [ dependencies = [
"phf_shared", "phf_shared 0.12.1",
]
[[package]]
name = "phf_codegen"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [
"phf_generator 0.10.0",
"phf_shared 0.10.0",
]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand 0.8.5",
]
[[package]]
name = "phf_generator"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared 0.11.3",
"rand 0.8.5",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher 0.3.11",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher 1.0.2",
] ]
[[package]] [[package]]
@@ -1752,7 +1972,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
dependencies = [ dependencies = [
"siphasher", "siphasher 1.0.2",
] ]
[[package]] [[package]]
@@ -1847,6 +2067,12 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]] [[package]]
name = "prettyplease" name = "prettyplease"
version = "0.2.37" version = "0.2.37"
@@ -1964,13 +2190,24 @@ version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha 0.3.1",
"rand_core 0.6.4",
]
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.9.2" version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [ dependencies = [
"rand_chacha", "rand_chacha 0.9.0",
"rand_core 0.9.5", "rand_core 0.9.5",
] ]
@@ -1985,6 +2222,16 @@ dependencies = [
"rand_core 0.10.0", "rand_core 0.10.0",
] ]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
]
[[package]] [[package]]
name = "rand_chacha" name = "rand_chacha"
version = "0.9.0" version = "0.9.0"
@@ -2320,6 +2567,41 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
dependencies = [
"ahash",
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"once_cell",
"selectors",
"tendril",
]
[[package]]
name = "selectors"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
dependencies = [
"bitflags",
"cssparser",
"derive_more",
"fxhash",
"log",
"new_debug_unreachable",
"phf 0.10.1",
"phf_codegen 0.10.0",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]] [[package]]
name = "self_cell" name = "self_cell"
version = "1.2.2" version = "1.2.2"
@@ -2418,6 +2700,15 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "servo_arc"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
dependencies = [
"stable_deref_trait",
]
[[package]] [[package]]
name = "sgclaw" name = "sgclaw"
version = "0.1.0" version = "0.1.0"
@@ -2428,7 +2719,9 @@ dependencies = [
"futures-util", "futures-util",
"hex", "hex",
"hmac", "hmac",
"regex",
"reqwest", "reqwest",
"scraper",
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",
@@ -2506,6 +2799,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "1.0.2" version = "1.0.2"
@@ -2550,7 +2849,6 @@ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"psm", "psm",
"windows-sys 0.52.0",
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
@@ -2566,6 +2864,31 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.11.1" version = "0.11.1"
@@ -2633,6 +2956,17 @@ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.69" version = "1.0.69"

View File

@@ -10,7 +10,9 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] }
futures-util = "0.3" futures-util = "0.3"
hex = "0.4" hex = "0.4"
hmac = "0.12" hmac = "0.12"
regex = "1"
reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] } reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
scraper = "0.20"
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde_json = "1" serde_json = "1"
sha2 = "0.10" sha2 = "0.10"

View File

@@ -0,0 +1,19 @@
{
"hotlist_url": "https://www.zhihu.com/hot",
"domains": {
"zhihu": "www.zhihu.com"
},
"literals": {
"hotlist_guard": "热榜"
},
"selectors": {
"hotlist_root": "main, body",
"hotlist_item": ".HotList-item, [data-hot-item], section ol li",
"hotlist_title_link": ".HotList-item-title a, h2 a, .ContentItem-title a",
"hotlist_summary": ".HotList-item-summary, .HotItem-content, .RichContent-inner, .ContentItem-excerpt",
"hotlist_heat": ".HotList-item-heat, .HotItem-metrics, .HotItem-hot",
"comment_list": ".Comments-list, .CommentListV2, [data-testid='comment-list'], .CommentList",
"comment_item": ".Comments-list > .CommentItem, .CommentListV2 > .CommentItem, .CommentItemV2, .CommentItem",
"comment_metric": ".CommentItem-metric, .CommentItem-footer button, .ContentItem-actions button, button"
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
{
"entry_url": "https://www.zhihu.com/creator",
"editor_url": "https://zhuanlan.zhihu.com/write",
"domains": {
"creator": "www.zhihu.com",
"editor": "zhuanlan.zhihu.com"
},
"literals": {
"write_entry_text": "写文章",
"title_placeholder": "请输入标题(最多 100 个字)",
"body_role": "textbox",
"publish_text": "发布",
"publish_confirm_text": "确认发布"
},
"selectors": {
"creator_write_panel": "div.css-1q62b6s",
"creator_write_entry": "div.css-1q62b6s > div.css-byu4by",
"title_input": "textarea[placeholder='请输入标题(最多 100 个字)']",
"body_editor": "div.notranslate.public-DraftEditor-content[contenteditable='true'][role='textbox']",
"publish_button": "button.Button--primary.Button--blue",
"publish_confirm_dialog": "div[role='dialog']",
"publish_confirm_button": "div[role='dialog'] button.Button--primary.Button--blue",
"published_title": "h1"
},
"steps": [
{
"name": "navigate_creator",
"action": "navigate",
"expected_domain": "creator",
"url_ref": "entry_url",
"log_message": "navigate https://www.zhihu.com/creator"
},
{
"name": "click_write_article",
"action": "click",
"expected_domain": "creator",
"selector_ref": "creator_write_entry",
"wait_after_ms": 1500,
"log_message": "click 写文章"
},
{
"name": "wait_editor_ready",
"action": "waitForSelector",
"expected_domain": "editor",
"selector_ref": "title_input",
"timeout_ms": 8000,
"log_message": "wait for editor title input"
},
{
"name": "type_title",
"action": "type",
"expected_domain": "editor",
"selector_ref": "title_input",
"text_source": "title",
"clear_first": true,
"log_message": "type article title into 请输入标题(最多 100 个字)"
},
{
"name": "type_body",
"action": "type",
"expected_domain": "editor",
"selector_ref": "body_editor",
"text_source": "body",
"clear_first": true,
"log_message": "type article body into editor textbox"
},
{
"name": "scroll_publish_button",
"action": "scrollTo",
"expected_domain": "editor",
"selector_ref": "publish_button",
"only_when_publish": true,
"log_message": "scroll to 发布"
},
{
"name": "click_publish",
"action": "click",
"expected_domain": "editor",
"selector_ref": "publish_button",
"wait_after_ms": 800,
"only_when_publish": true,
"capture_url": true,
"log_message": "click 发布"
},
{
"name": "wait_publish_confirm_dialog",
"action": "waitForSelector",
"expected_domain": "editor",
"selector_ref": "publish_confirm_dialog",
"timeout_ms": 8000,
"only_when_publish": true,
"log_message": "wait for publish confirm dialog"
},
{
"name": "click_publish_confirm",
"action": "click",
"expected_domain": "editor",
"selector_ref": "publish_confirm_button",
"wait_after_ms": 1500,
"only_when_publish": true,
"capture_url": true,
"log_message": "click 确认发布"
},
{
"name": "wait_published_title",
"action": "waitForSelector",
"expected_domain": "editor",
"selector_ref": "published_title",
"timeout_ms": 15000,
"only_when_publish": true,
"capture_url": true,
"log_message": "wait for published article title"
},
{
"name": "confirm_published_title",
"action": "getText",
"expected_domain": "editor",
"selector_ref": "published_title",
"only_when_publish": true,
"expect_text_source": "title",
"allow_empty_text": true,
"capture_url": true,
"log_message": "verify published article title"
}
]
}

View File

@@ -9,6 +9,7 @@ use crate::config::DeepSeekSettings;
use crate::pipe::{ use crate::pipe::{
AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport, AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
}; };
use crate::skill;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct AgentRuntimeContext { pub struct AgentRuntimeContext {
@@ -34,7 +35,7 @@ impl AgentRuntimeContext {
let _ = args.next(); let _ = args.next();
while let Some(arg) = args.next() { while let Some(arg) = args.next() {
if arg == OsString::from("--config-path") { if arg.to_string_lossy() == "--config-path" {
let Some(value) = args.next() else { let Some(value) = args.next() else {
return Err(PipeError::Protocol( return Err(PipeError::Protocol(
"missing value for --config-path".to_string(), "missing value for --config-path".to_string(),
@@ -88,26 +89,58 @@ fn send_mode_log<T: Transport>(transport: &T, mode: &str) -> Result<(), PipeErro
}) })
} }
fn explicit_non_task_response(history: &[ConversationMessage], instruction: &str) -> Option<String> { fn explicit_non_task_response(
history: &[ConversationMessage],
instruction: &str,
) -> Option<String> {
if !history.is_empty() { if !history.is_empty() {
return None; return None;
} }
let trimmed = instruction.trim(); let trimmed = instruction.trim();
if trimmed.is_empty() { if trimmed.is_empty() {
return Some("sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。".to_string()); return Some(
"sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。"
.to_string(),
);
} }
const TASK_HINTS: &[&str] = &[ const TASK_HINTS: &[&str] = &[
"打开", "搜索", "点击", "输入", "导航", "跳转", "访问", "提取", "获取", "网页", "页面", "打开",
"标签页", "百度", "知乎", "google", "open", "search", "click", "type", "navigate", "搜索",
"点击",
"输入",
"导航",
"跳转",
"访问",
"提取",
"获取",
"网页",
"页面",
"标签页",
"百度",
"知乎",
"google",
"open",
"search",
"click",
"type",
"navigate",
]; ];
if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) { if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) {
return None; return None;
} }
const CHITCHAT_INPUTS: &[&str] = &[ const CHITCHAT_INPUTS: &[&str] = &[
"hi", "hello", "hey", "你好", "您好", "", "在吗", "你是谁", "介绍一下你自己", "hi",
"hello",
"hey",
"你好",
"您好",
"",
"在吗",
"你是谁",
"介绍一下你自己",
]; ];
if CHITCHAT_INPUTS if CHITCHAT_INPUTS
.iter() .iter()
@@ -194,6 +227,22 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
}); });
} }
match skill::try_execute_skill(transport, browser_tool, &instruction) {
Ok(Some(summary)) => {
return transport.send(&AgentMessage::TaskComplete {
success: true,
summary,
});
}
Err(err) => {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
});
}
Ok(None) => {}
}
let task_context = CompatTaskContext { let task_context = CompatTaskContext {
conversation_id: (!conversation_id.trim().is_empty()) conversation_id: (!conversation_id.trim().is_empty())
.then_some(conversation_id.clone()), .then_some(conversation_id.clone()),

View File

@@ -21,8 +21,7 @@ pub fn execute_task_with_provider<P: LlmProvider, T: Transport>(
let messages = vec![ let messages = vec![
ChatMessage { ChatMessage {
role: "system".to_string(), role: "system".to_string(),
content: "You are sgClaw. Use browser_action to complete the browser task." content: "You are sgClaw. Use browser_action to complete the browser task.".to_string(),
.to_string(),
}, },
ChatMessage { ChatMessage {
role: "user".to_string(), role: "user".to_string(),
@@ -35,8 +34,8 @@ pub fn execute_task_with_provider<P: LlmProvider, T: Transport>(
.map_err(map_llm_error_to_pipe_error)?; .map_err(map_llm_error_to_pipe_error)?;
for call in calls { for call in calls {
let browser_call = parse_browser_action_call(call) let browser_call =
.map_err(|err| PipeError::Protocol(err.to_string()))?; parse_browser_action_call(call).map_err(|err| PipeError::Protocol(err.to_string()))?;
transport.send(&AgentMessage::LogEntry { transport.send(&AgentMessage::LogEntry {
level: "info".to_string(), level: "info".to_string(),

View File

@@ -60,14 +60,14 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
Err(err) => return Ok(failed_tool_result(err.to_string())), Err(err) => return Ok(failed_tool_result(err.to_string())),
}; };
let result = match self.browser_tool.invoke( let result =
request.action, match self
request.params, .browser_tool
&request.expected_domain, .invoke(request.action, request.params, &request.expected_domain)
) { {
Ok(result) => result, Ok(result) => result,
Err(err) => return Ok(failed_tool_result(err.to_string())), Err(err) => return Ok(failed_tool_result(err.to_string())),
}; };
let output = serde_json::to_string(&json!({ let output = serde_json::to_string(&json!({
"seq": result.seq, "seq": result.seq,
@@ -80,8 +80,7 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
Ok(ToolResult { Ok(ToolResult {
success: result.success, success: result.success,
output, output,
error: (!result.success) error: (!result.success).then(|| format_browser_action_error(&result.data)),
.then(|| format_browser_action_error(&result.data)),
}) })
} }
} }
@@ -92,7 +91,9 @@ struct BrowserActionRequest {
params: Value, params: Value,
} }
fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, BrowserActionAdapterError> { fn parse_browser_action_request(
args: Value,
) -> Result<BrowserActionRequest, BrowserActionAdapterError> {
let mut args = match args { let mut args = match args {
Value::Object(args) => args, Value::Object(args) => args,
other => { other => {

View File

@@ -8,7 +8,9 @@ use crate::config::DeepSeekSettings;
const SGCLAW_ZEROCLAW_WORKSPACE_DIR: &str = ".sgclaw-zeroclaw-workspace"; const SGCLAW_ZEROCLAW_WORKSPACE_DIR: &str = ".sgclaw-zeroclaw-workspace";
pub fn build_zeroclaw_config(workspace_root: &Path) -> Result<ZeroClawConfig, crate::config::ConfigError> { pub fn build_zeroclaw_config(
workspace_root: &Path,
) -> Result<ZeroClawConfig, crate::config::ConfigError> {
let settings = DeepSeekSettings::from_env()?; let settings = DeepSeekSettings::from_env()?;
Ok(build_zeroclaw_config_from_settings( Ok(build_zeroclaw_config_from_settings(
workspace_root, workspace_root,
@@ -21,13 +23,15 @@ pub fn build_zeroclaw_config_from_settings(
settings: &DeepSeekSettings, settings: &DeepSeekSettings,
) -> ZeroClawConfig { ) -> ZeroClawConfig {
let workspace_dir = zeroclaw_workspace_dir(workspace_root); let workspace_dir = zeroclaw_workspace_dir(workspace_root);
let mut config = ZeroClawConfig::default(); let mut config = ZeroClawConfig {
config.workspace_dir = workspace_dir.clone(); workspace_dir: workspace_dir.clone(),
config.config_path = workspace_dir.join("config.toml"); config_path: workspace_dir.join("config.toml"),
config.default_provider = Some("deepseek".to_string()); default_provider: Some("deepseek".to_string()),
config.default_model = Some(settings.model.clone()); default_model: Some(settings.model.clone()),
config.api_key = Some(settings.api_key.clone()); api_key: Some(settings.api_key.clone()),
config.api_url = Some(settings.base_url.clone()); api_url: Some(settings.base_url.clone()),
..ZeroClawConfig::default()
};
configure_embedded_memory(&mut config); configure_embedded_memory(&mut config);
configure_embedded_cron(&mut config); configure_embedded_cron(&mut config);
config config

View File

@@ -65,7 +65,10 @@ where
for job in jobs { for job in jobs {
if !matches!(job.job_type, JobType::Agent) { if !matches!(job.job_type, JobType::Agent) {
anyhow::bail!("unsupported cron job type in sgclaw compat: {:?}", job.job_type); anyhow::bail!(
"unsupported cron job type in sgclaw compat: {:?}",
job.job_type
);
} }
let started_at = Utc::now(); let started_at = Utc::now();

View File

@@ -9,10 +9,12 @@ pub fn log_entry_for_turn_event(event: &TurnEvent) -> Option<AgentMessage> {
level: "info".to_string(), level: "info".to_string(),
message: format_tool_call(name, args), message: format_tool_call(name, args),
}), }),
TurnEvent::ToolResult { output, .. } if is_tool_error(output) => Some(AgentMessage::LogEntry { TurnEvent::ToolResult { output, .. } if is_tool_error(output) => {
level: "error".to_string(), Some(AgentMessage::LogEntry {
message: output.trim_start_matches("Error: ").to_string(), level: "error".to_string(),
}), message: output.trim_start_matches("Error: ").to_string(),
})
}
_ => None, _ => None,
} }
} }
@@ -29,7 +31,10 @@ fn format_tool_call(name: &str, args: &Value) -> String {
match action { match action {
"navigate" => { "navigate" => {
let url = args.get("url").and_then(Value::as_str).unwrap_or("<missing-url>"); let url = args
.get("url")
.and_then(Value::as_str)
.unwrap_or("<missing-url>");
format!("navigate {url}") format!("navigate {url}")
} }
"type" => { "type" => {

View File

@@ -7,18 +7,14 @@ use zeroclaw::agent::dispatcher::NativeToolDispatcher;
use zeroclaw::agent::{Agent, TurnEvent}; use zeroclaw::agent::{Agent, TurnEvent};
use zeroclaw::config::Config as ZeroClawConfig; use zeroclaw::config::Config as ZeroClawConfig;
use zeroclaw::observability::{NoopObserver, Observer}; use zeroclaw::observability::{NoopObserver, Observer};
use zeroclaw::providers::{ use zeroclaw::providers::traits::{ProviderCapabilities, StreamEvent, StreamOptions, StreamResult};
self, ChatMessage, ChatRequest, ChatResponse, Provider, use zeroclaw::providers::{self, ChatMessage, ChatRequest, ChatResponse, Provider};
};
use zeroclaw::providers::traits::{
ProviderCapabilities, StreamEvent, StreamOptions, StreamResult,
};
use crate::compat::browser_tool_adapter::{ZeroClawBrowserTool, BROWSER_ACTION_TOOL_NAME}; use crate::compat::browser_tool_adapter::{ZeroClawBrowserTool, BROWSER_ACTION_TOOL_NAME};
use crate::compat::config_adapter::build_zeroclaw_config_from_settings; use crate::compat::config_adapter::build_zeroclaw_config_from_settings;
use crate::config::DeepSeekSettings;
use crate::compat::event_bridge::log_entry_for_turn_event; use crate::compat::event_bridge::log_entry_for_turn_event;
use crate::compat::memory_adapter::build_memory; use crate::compat::memory_adapter::build_memory;
use crate::config::DeepSeekSettings;
use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport}; use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport};
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
@@ -123,10 +119,7 @@ fn build_agent<T: Transport + 'static>(
fn build_provider(config: &ZeroClawConfig) -> Result<Box<dyn Provider>, PipeError> { fn build_provider(config: &ZeroClawConfig) -> Result<Box<dyn Provider>, PipeError> {
let provider_name = config.default_provider.as_deref().unwrap_or("deepseek"); let provider_name = config.default_provider.as_deref().unwrap_or("deepseek");
let model_name = config let model_name = config.default_model.as_deref().unwrap_or("deepseek-chat");
.default_model
.as_deref()
.unwrap_or("deepseek-chat");
let runtime_options = providers::provider_runtime_options_from_config(config); let runtime_options = providers::provider_runtime_options_from_config(config);
let resolved_provider_name = if provider_name == "deepseek" { let resolved_provider_name = if provider_name == "deepseek" {
config config
@@ -191,7 +184,9 @@ impl Provider for NonStreamingProvider {
model: &str, model: &str,
temperature: f64, temperature: f64,
) -> anyhow::Result<String> { ) -> anyhow::Result<String> {
self.inner.chat_with_history(messages, model, temperature).await self.inner
.chat_with_history(messages, model, temperature)
.await
} }
async fn chat( async fn chat(

View File

@@ -50,8 +50,7 @@ impl DeepSeekSettings {
let config: RawDeepSeekSettings = serde_json::from_str(&raw) let config: RawDeepSeekSettings = serde_json::from_str(&raw)
.map_err(|err| ConfigError::ConfigParse(path.to_path_buf(), err.to_string()))?; .map_err(|err| ConfigError::ConfigParse(path.to_path_buf(), err.to_string()))?;
Self::new(config.api_key, config.base_url, config.model) Self::new(config.api_key, config.base_url, config.model).map_err(|err| err.with_path(path))
.map_err(|err| err.with_path(path))
} }
fn new(api_key: String, base_url: String, model: String) -> Result<Self, ConfigError> { fn new(api_key: String, base_url: String, model: String) -> Result<Self, ConfigError> {

View File

@@ -4,6 +4,7 @@ pub mod config;
pub mod llm; pub mod llm;
pub mod pipe; pub mod pipe;
pub mod security; pub mod security;
pub mod skill;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;

View File

@@ -76,45 +76,35 @@ impl<T: Transport> BrowserPipeTool<T> {
self.transport.send(&command)?; self.transport.send(&command)?;
let started = Instant::now(); let started = Instant::now();
loop { let Some(remaining) = self.response_timeout.checked_sub(started.elapsed()) else {
let Some(remaining) = self.response_timeout.checked_sub(started.elapsed()) else { return Err(PipeError::Timeout);
return Err(PipeError::Timeout); };
};
match self.transport.recv_timeout(remaining)? { match self.transport.recv_timeout(remaining)? {
BrowserMessage::Response { BrowserMessage::Response {
seq: response_seq, seq: response_seq,
success, success,
data, data,
aom_snapshot, aom_snapshot,
timing, timing,
} if response_seq == seq => { } if response_seq == seq => Ok(CommandOutput {
return Ok(CommandOutput { seq: response_seq,
seq: response_seq, success,
success, data,
data, aom_snapshot,
aom_snapshot, timing,
timing, }),
}); BrowserMessage::Response {
} seq: response_seq, ..
BrowserMessage::Response { } => Err(PipeError::Protocol(format!(
seq: response_seq, .. "received response seq {response_seq} while waiting for {seq}"
} => { ))),
return Err(PipeError::Protocol(format!( BrowserMessage::Init { .. } => Err(PipeError::UnexpectedMessage(
"received response seq {response_seq} while waiting for {seq}" "received duplicate init after handshake".to_string(),
))); )),
} BrowserMessage::SubmitTask { .. } => Err(PipeError::UnexpectedMessage(
BrowserMessage::Init { .. } => { "received submit_task while waiting for response".to_string(),
return Err(PipeError::UnexpectedMessage( )),
"received duplicate init after handshake".to_string(),
));
}
BrowserMessage::SubmitTask { .. } => {
return Err(PipeError::UnexpectedMessage(
"received submit_task while waiting for response".to_string(),
));
}
}
} }
} }
} }

View File

@@ -5,8 +5,8 @@ pub mod protocol;
pub use browser_tool::{BrowserPipeTool, CommandOutput}; pub use browser_tool::{BrowserPipeTool, CommandOutput};
pub use handshake::{perform_handshake, HandshakeResult}; pub use handshake::{perform_handshake, HandshakeResult};
pub use protocol::{ pub use protocol::{
supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage, supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage, SecurityFields,
SecurityFields, Timing, Timing,
}; };
use std::io::{BufRead, BufReader, Read, Write}; use std::io::{BufRead, BufReader, Read, Write};
@@ -71,7 +71,7 @@ impl StdioTransport {
continue; continue;
} }
if line.as_bytes().len() > MAX_MESSAGE_BYTES { if line.len() > MAX_MESSAGE_BYTES {
let _ = tx.send(Err(PipeError::MessageTooLarge(line.len()))); let _ = tx.send(Err(PipeError::MessageTooLarge(line.len())));
continue; continue;
} }

85
src/skill/mod.rs Normal file
View File

@@ -0,0 +1,85 @@
pub mod router;
pub mod zhihu;
pub mod zhihu_hotlist;
pub mod zhihu_hotlist_store;
pub mod zhihu_navigation;
use std::path::PathBuf;
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
fn relative_skill_resource_path(resource_name: &str) -> PathBuf {
PathBuf::from("resources")
.join("skills")
.join(resource_name)
}
pub(crate) fn skill_resource_path_from_executable(
executable_path: PathBuf,
resource_name: &str,
) -> PathBuf {
executable_path
.parent()
.map(|dir| dir.join("resources").join("skills").join(resource_name))
.unwrap_or_else(|| relative_skill_resource_path(resource_name))
}
pub(crate) fn default_skill_resource_path(resource_name: &str) -> PathBuf {
std::env::current_exe()
.ok()
.map(|path| skill_resource_path_from_executable(path, resource_name))
.filter(|path| path.exists())
.unwrap_or_else(|| relative_skill_resource_path(resource_name))
}
pub fn try_execute_skill<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
instruction: &str,
) -> Result<Option<String>, PipeError> {
match router::route_instruction(instruction)
.map_err(|err| PipeError::Protocol(err.to_string()))?
{
Some(router::RoutedSkill::ZhihuWrite(req)) => {
let result = zhihu::execute(transport, browser_tool, req)
.map_err(|err| PipeError::Protocol(err.to_string()))?;
Ok(Some(result.summary))
}
Some(router::RoutedSkill::ZhihuHotlistCollect(req)) => {
let result = zhihu_hotlist::execute_collect(transport, browser_tool, req)
.map_err(|err| PipeError::Protocol(err.to_string()))?;
Ok(Some(result.summary))
}
Some(router::RoutedSkill::ZhihuHotlistReport(req)) => {
let result = zhihu_hotlist::execute_report(req)
.map_err(|err| PipeError::Protocol(err.to_string()))?;
Ok(Some(result.summary))
}
Some(router::RoutedSkill::ZhihuNavigate(req)) => {
let result = zhihu_navigation::execute(transport, browser_tool, req)
.map_err(|err| PipeError::Protocol(err.to_string()))?;
Ok(Some(result.summary))
}
None => Ok(None),
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::skill_resource_path_from_executable;
#[test]
fn skill_resource_path_uses_executable_directory_instead_of_cwd() {
let executable_path = PathBuf::from("/tmp/out/KylinRelease/sgclaw");
let resolved =
skill_resource_path_from_executable(executable_path, "zhihu_navigation_pages.json");
assert_eq!(
resolved,
PathBuf::from("/tmp/out/KylinRelease/resources/skills/zhihu_navigation_pages.json")
);
}
}

92
src/skill/router.rs Normal file
View File

@@ -0,0 +1,92 @@
use thiserror::Error;
use super::zhihu::ZhihuWriteRequest;
use super::zhihu_hotlist::{ZhihuHotlistCollectRequest, ZhihuHotlistReportRequest};
use super::zhihu_navigation::{
try_route_alias as try_route_zhihu_navigation_alias, ZhihuNavigateRequest,
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RoutedSkill {
ZhihuWrite(ZhihuWriteRequest),
ZhihuHotlistCollect(ZhihuHotlistCollectRequest),
ZhihuHotlistReport(ZhihuHotlistReportRequest),
ZhihuNavigate(ZhihuNavigateRequest),
}
#[derive(Debug, Error)]
pub enum RouterError {
#[error("missing skill name after skill: prefix")]
MissingSkillName,
#[error("missing JSON arguments for skill: {0}")]
MissingArguments(String),
#[error("unknown skill: {0}")]
UnknownSkill(String),
#[error("invalid JSON arguments for skill {skill}: {message}")]
InvalidArguments { skill: String, message: String },
}
pub fn route_instruction(instruction: &str) -> Result<Option<RoutedSkill>, RouterError> {
let trimmed = instruction.trim();
if trimmed.starts_with("skill:") {
return parse_explicit_skill(trimmed).map(Some);
}
match try_route_zhihu_navigation_alias(trimmed) {
Ok(Some(req)) => Ok(Some(RoutedSkill::ZhihuNavigate(req))),
Ok(None) => Ok(None),
Err(err) => Err(RouterError::InvalidArguments {
skill: "zhihu_navigate".to_string(),
message: err.to_string(),
}),
}
}
fn parse_explicit_skill(instruction: &str) -> Result<RoutedSkill, RouterError> {
let rest = instruction
.strip_prefix("skill:")
.ok_or(RouterError::MissingSkillName)?
.trim();
if rest.is_empty() {
return Err(RouterError::MissingSkillName);
}
let split_at = rest
.find(char::is_whitespace)
.ok_or_else(|| RouterError::MissingArguments(rest.to_string()))?;
let name = rest[..split_at].trim();
let args = rest[split_at..].trim();
if args.is_empty() {
return Err(RouterError::MissingArguments(name.to_string()));
}
match name {
"zhihu_write" => serde_json::from_str::<ZhihuWriteRequest>(args)
.map(RoutedSkill::ZhihuWrite)
.map_err(|err| RouterError::InvalidArguments {
skill: name.to_string(),
message: err.to_string(),
}),
"zhihu_hotlist_collect" => serde_json::from_str::<ZhihuHotlistCollectRequest>(args)
.map(RoutedSkill::ZhihuHotlistCollect)
.map_err(|err| RouterError::InvalidArguments {
skill: name.to_string(),
message: err.to_string(),
}),
"zhihu_hotlist_report" => serde_json::from_str::<ZhihuHotlistReportRequest>(args)
.map(RoutedSkill::ZhihuHotlistReport)
.map_err(|err| RouterError::InvalidArguments {
skill: name.to_string(),
message: err.to_string(),
}),
"zhihu_navigate" => serde_json::from_str::<ZhihuNavigateRequest>(args)
.map(RoutedSkill::ZhihuNavigate)
.map_err(|err| RouterError::InvalidArguments {
skill: name.to_string(),
message: err.to_string(),
}),
other => Err(RouterError::UnknownSkill(other.to_string())),
}
}

419
src/skill/zhihu.rs Normal file
View File

@@ -0,0 +1,419 @@
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use thiserror::Error;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, Transport};
const ZHIHU_ARTICLE_URL_PREFIX: &str = "https://zhuanlan.zhihu.com/p/";
const ZHIHU_ARTICLE_EDIT_SUFFIX: &str = "/edit";
fn default_publish() -> bool {
true
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct ZhihuWriteRequest {
pub title: String,
pub body: String,
#[serde(default = "default_publish")]
pub publish: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct ZhihuWriteResult {
pub summary: String,
pub published: bool,
pub final_url: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuFlow {
pub entry_url: String,
pub editor_url: String,
pub domains: HashMap<String, String>,
pub literals: HashMap<String, String>,
pub selectors: HashMap<String, String>,
pub steps: Vec<FlowStep>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct FlowStep {
pub name: String,
pub action: String,
pub expected_domain: String,
pub selector_ref: Option<String>,
pub url_ref: Option<String>,
pub text_source: Option<String>,
#[serde(default)]
pub clear_first: bool,
pub wait_after_ms: Option<u64>,
pub timeout_ms: Option<u64>,
pub outer: Option<bool>,
pub x: Option<i64>,
pub y: Option<i64>,
#[serde(default)]
pub only_when_publish: bool,
pub expect_contains: Option<String>,
pub expect_text_source: Option<String>,
#[serde(default)]
pub allow_empty_text: bool,
#[serde(default)]
pub capture_url: bool,
pub log_message: String,
}
#[derive(Debug, Error)]
pub enum ZhihuSkillError {
#[error("title 不能为空")]
EmptyTitle,
#[error("body 不能为空")]
EmptyBody,
#[error("failed to load zhihu flow: {0}")]
FlowLoad(String),
#[error("unknown action in zhihu flow: {0}")]
UnknownAction(String),
#[error("missing selector ref in zhihu flow step: {0}")]
MissingSelectorRef(String),
#[error("missing url ref in zhihu flow step: {0}")]
MissingUrlRef(String),
#[error("missing selector in zhihu flow: {0}")]
MissingSelector(String),
#[error("missing domain in zhihu flow: {0}")]
MissingDomain(String),
#[error("missing text source in zhihu flow step: {0}")]
MissingTextSource(String),
#[error("missing scroll target in zhihu flow step: {0}")]
MissingScrollTarget(String),
#[error("browser action failed at step {step}: {message}")]
BrowserActionFailed { step: String, message: String },
#[error("step {step} expected text containing `{expected}`, got `{actual}`")]
ExpectedTextMissing {
step: String,
expected: String,
actual: String,
},
#[error("step {step} expected text `{expected}`, got `{actual}`")]
ExpectedTextMismatch {
step: String,
expected: String,
actual: String,
},
#[error("step {step} did not return article url; cannot confirm article was published")]
MissingPublishedUrl { step: String },
}
pub fn default_flow_path() -> PathBuf {
super::default_skill_resource_path("zhihu_write_flow.json")
}
pub fn load_flow() -> Result<ZhihuFlow, ZhihuSkillError> {
let path = default_flow_path();
let contents = fs::read_to_string(&path)
.map_err(|err| ZhihuSkillError::FlowLoad(format!("{} ({})", err, path.display())))?;
serde_json::from_str(&contents)
.map_err(|err| ZhihuSkillError::FlowLoad(format!("{} ({})", err, path.display())))
}
pub fn execute<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
req: ZhihuWriteRequest,
) -> Result<ZhihuWriteResult, ZhihuSkillError> {
validate_request(&req)?;
let flow = load_flow()?;
let mut final_url = None;
let mut published_url = None;
let mut publish_capture_step = None;
for step in &flow.steps {
if step.only_when_publish && !req.publish {
continue;
}
transport
.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: step.log_message.clone(),
})
.map_err(|err| ZhihuSkillError::BrowserActionFailed {
step: step.name.clone(),
message: err.to_string(),
})?;
let action = parse_action(&step.action)?;
let expected_domain = resolve_domain(&flow, &step.expected_domain)?;
let params = build_params(&flow, step, &req)?;
let result = browser_tool
.invoke(action, params, &expected_domain)
.map_err(|err| ZhihuSkillError::BrowserActionFailed {
step: step.name.clone(),
message: err.to_string(),
})?;
if !result.success {
return Err(ZhihuSkillError::BrowserActionFailed {
step: step.name.clone(),
message: result.data.to_string(),
});
}
if step.capture_url {
if let Some(url) = extract_url(&result.data) {
if step.only_when_publish {
if is_published_article_url(&url) {
published_url = normalize_published_article_url(&url);
}
} else {
final_url = Some(url);
}
}
if step.only_when_publish {
publish_capture_step = Some(step.name.clone());
}
}
if let Some(expected) = step.expect_contains.as_deref() {
let actual = extract_text(&result.data);
if !actual.contains(expected) {
return Err(ZhihuSkillError::ExpectedTextMissing {
step: step.name.clone(),
expected: expected.to_string(),
actual,
});
}
}
if let Some(source) = step.expect_text_source.as_deref() {
let expected = resolve_text_source(&req, source)?.to_string();
let actual = extract_text(&result.data);
if actual.is_empty() && step.allow_empty_text {
continue;
}
if actual != expected {
return Err(ZhihuSkillError::ExpectedTextMismatch {
step: step.name.clone(),
expected,
actual,
});
}
}
}
if req.publish {
final_url = Some(
published_url.ok_or_else(|| ZhihuSkillError::MissingPublishedUrl {
step: publish_capture_step.unwrap_or_else(|| "publish_complete".to_string()),
})?,
);
}
Ok(ZhihuWriteResult {
summary: build_summary(&req, final_url.as_deref()),
published: req.publish,
final_url,
})
}
fn validate_request(req: &ZhihuWriteRequest) -> Result<(), ZhihuSkillError> {
if req.title.trim().is_empty() {
return Err(ZhihuSkillError::EmptyTitle);
}
if req.body.trim().is_empty() {
return Err(ZhihuSkillError::EmptyBody);
}
Ok(())
}
fn parse_action(name: &str) -> Result<Action, ZhihuSkillError> {
match name {
"click" => Ok(Action::Click),
"type" => Ok(Action::Type),
"navigate" => Ok(Action::Navigate),
"getText" => Ok(Action::GetText),
"getHtml" => Ok(Action::GetHtml),
"waitForSelector" => Ok(Action::WaitForSelector),
"scrollTo" => Ok(Action::ScrollTo),
other => Err(ZhihuSkillError::UnknownAction(other.to_string())),
}
}
fn resolve_domain(flow: &ZhihuFlow, key: &str) -> Result<String, ZhihuSkillError> {
flow.domains
.get(key)
.cloned()
.ok_or_else(|| ZhihuSkillError::MissingDomain(key.to_string()))
}
fn resolve_selector<'a>(flow: &'a ZhihuFlow, key: &str) -> Result<&'a str, ZhihuSkillError> {
flow.selectors
.get(key)
.map(String::as_str)
.ok_or_else(|| ZhihuSkillError::MissingSelector(key.to_string()))
}
fn resolve_text_source<'a>(
req: &'a ZhihuWriteRequest,
source: &str,
) -> Result<&'a str, ZhihuSkillError> {
match source {
"title" => Ok(req.title.as_str()),
"body" => Ok(req.body.as_str()),
other => Err(ZhihuSkillError::MissingTextSource(other.to_string())),
}
}
fn build_params(
flow: &ZhihuFlow,
step: &FlowStep,
req: &ZhihuWriteRequest,
) -> Result<Value, ZhihuSkillError> {
match step.action.as_str() {
"navigate" => {
let url_ref = step
.url_ref
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingUrlRef(step.name.clone()))?;
let url = match url_ref {
"entry_url" => flow.entry_url.as_str(),
"editor_url" => flow.editor_url.as_str(),
other => {
return Err(ZhihuSkillError::MissingUrlRef(format!(
"{}:{}",
step.name, other
)))
}
};
Ok(json!({ "url": url }))
}
"click" => {
let selector_ref = step
.selector_ref
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
let selector = resolve_selector(flow, selector_ref)?;
let mut params = serde_json::Map::new();
params.insert("selector".to_string(), Value::String(selector.to_string()));
if let Some(wait_after_ms) = step.wait_after_ms {
params.insert("wait_after".to_string(), Value::from(wait_after_ms));
}
Ok(Value::Object(params))
}
"type" => {
let selector_ref = step
.selector_ref
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
let selector = resolve_selector(flow, selector_ref)?;
let text_source = step
.text_source
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingTextSource(step.name.clone()))?;
let text = resolve_text_source(req, text_source)?;
Ok(json!({
"selector": selector,
"text": text,
"clear_first": step.clear_first,
}))
}
"getText" => {
let selector_ref = step
.selector_ref
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
let selector = resolve_selector(flow, selector_ref)?;
Ok(json!({ "selector": selector }))
}
"getHtml" => {
let selector_ref = step
.selector_ref
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
let selector = resolve_selector(flow, selector_ref)?;
let mut params = serde_json::Map::new();
params.insert("selector".to_string(), Value::String(selector.to_string()));
if let Some(outer) = step.outer {
params.insert("outer".to_string(), Value::Bool(outer));
}
Ok(Value::Object(params))
}
"waitForSelector" => {
let selector_ref = step
.selector_ref
.as_deref()
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
let selector = resolve_selector(flow, selector_ref)?;
let mut params = serde_json::Map::new();
params.insert("selector".to_string(), Value::String(selector.to_string()));
if let Some(timeout_ms) = step.timeout_ms {
params.insert("timeout_ms".to_string(), Value::from(timeout_ms));
}
Ok(Value::Object(params))
}
"scrollTo" => {
if let Some(selector_ref) = step.selector_ref.as_deref() {
let selector = resolve_selector(flow, selector_ref)?;
return Ok(json!({ "selector": selector }));
}
if step.x.is_none() && step.y.is_none() {
return Err(ZhihuSkillError::MissingScrollTarget(step.name.clone()));
}
let mut params = serde_json::Map::new();
if let Some(x) = step.x {
params.insert("x".to_string(), Value::from(x));
}
if let Some(y) = step.y {
params.insert("y".to_string(), Value::from(y));
}
Ok(Value::Object(params))
}
other => Err(ZhihuSkillError::UnknownAction(other.to_string())),
}
}
pub fn extract_text(data: &Value) -> String {
data.get("text")
.and_then(Value::as_str)
.unwrap_or_default()
.trim()
.to_string()
}
fn extract_url(data: &Value) -> Option<String> {
data.get("url")
.and_then(Value::as_str)
.map(str::trim)
.filter(|url| !url.is_empty())
.map(ToOwned::to_owned)
}
fn is_published_article_url(url: &str) -> bool {
normalize_published_article_url(url).is_some()
}
fn normalize_published_article_url(url: &str) -> Option<String> {
let trimmed = url.trim();
if !trimmed.starts_with(ZHIHU_ARTICLE_URL_PREFIX) {
return None;
}
if trimmed.ends_with(ZHIHU_ARTICLE_EDIT_SUFFIX) {
return Some(
trimmed
.trim_end_matches(ZHIHU_ARTICLE_EDIT_SUFFIX)
.to_string(),
);
}
Some(trimmed.to_string())
}
fn build_summary(req: &ZhihuWriteRequest, final_url: Option<&str>) -> String {
if req.publish {
let url = final_url.expect("publish flow must provide final_url before building summary");
format!("知乎文章已发布:{} ({url})", req.title.trim())
} else {
format!("知乎文章草稿已填充:{}", req.title.trim())
}
}

815
src/skill/zhihu_hotlist.rs Normal file
View File

@@ -0,0 +1,815 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fs;
use std::path::PathBuf;
use std::sync::OnceLock;
use std::time::{SystemTime, UNIX_EPOCH};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use thiserror::Error;
use uuid::Uuid;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, CommandOutput, Transport};
use super::zhihu_hotlist_store::{
load_latest_snapshot, load_snapshot, persist_snapshot, resolve_store_dir,
ZhihuCommentMetricSnapshot, ZhihuHotItemSnapshot, ZhihuHotlistCollectionStats,
ZhihuHotlistSnapshot, ZhihuHotlistStoreError,
};
const COLLECTOR_VERSION: &str = "zhihu_hotlist_v1";
const DEFAULT_WAIT_TIMEOUT_MS: u64 = 5_000;
const DEFAULT_COMMENT_SCROLL_Y: i64 = 1_200;
fn default_top_n() -> usize {
10
}
fn default_comments_per_item() -> usize {
20
}
fn default_report_top_n() -> usize {
10
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct ZhihuHotlistCollectRequest {
#[serde(default = "default_top_n")]
pub top_n: usize,
#[serde(default = "default_comments_per_item")]
pub comments_per_item: usize,
#[serde(default)]
pub store_dir: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct ZhihuHotlistCollectResult {
pub summary: String,
pub snapshot_id: String,
pub item_count: usize,
pub snapshot_path: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct ZhihuHotlistReportRequest {
#[serde(default)]
pub snapshot_id: Option<String>,
#[serde(default)]
pub store_dir: Option<String>,
#[serde(default = "default_report_top_n")]
pub top_n: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct ZhihuHotlistReportResult {
pub summary: String,
pub snapshot_id: String,
pub item_count: usize,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuHotlistFlow {
pub hotlist_url: String,
pub domains: HashMap<String, String>,
pub literals: HashMap<String, String>,
pub selectors: HashMap<String, String>,
}
#[derive(Debug, Error)]
pub enum ZhihuHotlistSkillError {
#[error("top_n must be greater than 0")]
InvalidTopN,
#[error("comments_per_item must be greater than 0")]
InvalidCommentsPerItem,
#[error("failed to load zhihu hotlist flow: {0}")]
FlowLoad(String),
#[error("missing selector in zhihu hotlist flow: {0}")]
MissingSelector(String),
#[error("missing domain in zhihu hotlist flow: {0}")]
MissingDomain(String),
#[error("missing literal in zhihu hotlist flow: {0}")]
MissingLiteral(String),
#[error("invalid selector in zhihu hotlist flow `{name}`: {message}")]
InvalidSelector { name: String, message: String },
#[error("browser action failed at step {step}: {message}")]
BrowserActionFailed { step: String, message: String },
#[error("zhihu hotlist page did not expose any items")]
NoHotlistItems,
#[error("zhihu hotlist html did not include enough data for item extraction")]
IncompleteHotlistHtml,
#[error(transparent)]
Store(#[from] ZhihuHotlistStoreError),
}
pub fn default_flow_path() -> PathBuf {
super::default_skill_resource_path("zhihu_hotlist_flow.json")
}
pub fn load_flow() -> Result<ZhihuHotlistFlow, ZhihuHotlistSkillError> {
let path = default_flow_path();
let contents = fs::read_to_string(&path)
.map_err(|err| ZhihuHotlistSkillError::FlowLoad(format!("{} ({})", err, path.display())))?;
serde_json::from_str(&contents)
.map_err(|err| ZhihuHotlistSkillError::FlowLoad(format!("{} ({})", err, path.display())))
}
pub fn execute_collect<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
req: ZhihuHotlistCollectRequest,
) -> Result<ZhihuHotlistCollectResult, ZhihuHotlistSkillError> {
validate_collect_request(&req)?;
let flow = load_flow()?;
let zhihu_domain = resolve_domain(&flow, "zhihu")?;
let hotlist_guard = resolve_literal(&flow, "hotlist_guard")?;
let hotlist_root_selector = resolve_selector(&flow, "hotlist_root")?;
let hotlist_item_selector = resolve_selector(&flow, "hotlist_item")?;
let comment_list_selector = resolve_selector(&flow, "comment_list")?;
let comment_item_selector = resolve_selector(&flow, "comment_item")?;
let comment_metric_selector = resolve_selector(&flow, "comment_metric")?;
let page_url = ensure_hotlist_page(
transport,
browser_tool,
&flow,
&zhihu_domain,
hotlist_guard,
hotlist_root_selector,
hotlist_item_selector,
)?;
let hotlist_html = run_action(
transport,
browser_tool,
"capture hotlist html",
Action::GetHtml,
json!({ "selector": hotlist_root_selector, "outer": true }),
&zhihu_domain,
)?;
let hotlist_items = parse_hotlist_items(&hotlist_html.data, &flow, req.top_n)?;
if hotlist_items.is_empty() {
return Err(ZhihuHotlistSkillError::NoHotlistItems);
}
let mut items = Vec::with_capacity(hotlist_items.len());
let mut partial_items = 0usize;
let mut items_with_comment_metrics = 0usize;
let mut total_comment_metric_records = 0usize;
let comment_context = CommentCollectionContext {
zhihu_domain: &zhihu_domain,
comment_list_selector,
comment_item_selector,
comment_metric_selector,
page_root_selector: hotlist_root_selector,
comments_per_item: req.comments_per_item,
};
for hot_item in hotlist_items {
let comment_metrics = match collect_comment_metrics(
transport,
browser_tool,
&comment_context,
&hot_item.url,
hot_item.rank,
) {
Ok(metrics) => metrics,
Err(_) => {
partial_items += 1;
Vec::new()
}
};
if !comment_metrics.is_empty() {
items_with_comment_metrics += 1;
total_comment_metric_records += comment_metrics.len();
}
items.push(ZhihuHotItemSnapshot {
rank: hot_item.rank,
item_id: hot_item.item_id,
url: hot_item.url,
title: hot_item.title,
summary: hot_item.summary,
heat_text: hot_item.heat_text.clone(),
heat_value: parse_count_text(&hot_item.heat_text),
comment_metrics,
});
}
let snapshot = ZhihuHotlistSnapshot {
snapshot_id: build_snapshot_id(),
captured_at_ms: now_unix_ms(),
page_url,
collector_version: COLLECTOR_VERSION.to_string(),
collection_stats: ZhihuHotlistCollectionStats {
requested_items: req.top_n,
collected_items: items.len(),
items_with_comment_metrics,
total_comment_metric_records,
partial_items,
},
items,
};
let store_dir = resolve_store_dir(req.store_dir.as_deref());
let persisted = persist_snapshot(&store_dir, &snapshot)?;
let summary = format!(
"知乎热榜快照已保存:{} 条热榜,{} 条评论指标记录 ({})",
snapshot.items.len(),
snapshot.collection_stats.total_comment_metric_records,
persisted.snapshot_path.display()
);
Ok(ZhihuHotlistCollectResult {
summary,
snapshot_id: snapshot.snapshot_id,
item_count: snapshot.items.len(),
snapshot_path: persisted.snapshot_path.display().to_string(),
})
}
pub fn execute_report(
req: ZhihuHotlistReportRequest,
) -> Result<ZhihuHotlistReportResult, ZhihuHotlistSkillError> {
validate_report_request(&req)?;
let store_dir = resolve_store_dir(req.store_dir.as_deref());
let snapshot = match req.snapshot_id.as_deref() {
Some(snapshot_id) if !snapshot_id.trim().is_empty() => {
load_snapshot(&store_dir, snapshot_id.trim())?
}
_ => load_latest_snapshot(&store_dir)?,
};
let mut lines = vec![format!(
"知乎热榜报告 {}: 共 {} 条,采集于 {}",
snapshot.snapshot_id,
snapshot.items.len(),
snapshot.captured_at_ms
)];
for item in snapshot.items.iter().take(req.top_n) {
let totals = aggregate_comment_metrics(&item.comment_metrics);
lines.push(format!(
"{}. {} | 热度 {} | 评论指标 {} 条 | 回复 {} | 赞同 {} | 收藏 {} | 红心 {}",
item.rank,
item.title,
item.heat_text,
item.comment_metrics.len(),
totals.reply_count,
totals.upvote_count,
totals.favorite_count,
totals.heart_count,
));
}
Ok(ZhihuHotlistReportResult {
summary: lines.join("\n"),
snapshot_id: snapshot.snapshot_id,
item_count: snapshot.items.len(),
})
}
fn validate_collect_request(
req: &ZhihuHotlistCollectRequest,
) -> Result<(), ZhihuHotlistSkillError> {
if req.top_n == 0 {
return Err(ZhihuHotlistSkillError::InvalidTopN);
}
if req.comments_per_item == 0 {
return Err(ZhihuHotlistSkillError::InvalidCommentsPerItem);
}
Ok(())
}
fn validate_report_request(req: &ZhihuHotlistReportRequest) -> Result<(), ZhihuHotlistSkillError> {
if req.top_n == 0 {
return Err(ZhihuHotlistSkillError::InvalidTopN);
}
Ok(())
}
fn ensure_hotlist_page<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
flow: &ZhihuHotlistFlow,
zhihu_domain: &str,
hotlist_guard: &str,
hotlist_root_selector: &str,
hotlist_item_selector: &str,
) -> Result<String, ZhihuHotlistSkillError> {
let hotlist_probe = run_action(
transport,
browser_tool,
"probe current Zhihu page for hotlist guard",
Action::GetText,
json!({ "selector": hotlist_root_selector }),
zhihu_domain,
);
if let Ok(result) = hotlist_probe {
let text = extract_text(&result.data);
if text.contains(hotlist_guard) {
run_action(
transport,
browser_tool,
"wait for hotlist items on current page",
Action::WaitForSelector,
json!({ "selector": hotlist_item_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
zhihu_domain,
)?;
return Ok(extract_url(&result.data).unwrap_or_else(|| flow.hotlist_url.clone()));
}
}
let navigate = run_action(
transport,
browser_tool,
"navigate to Zhihu hotlist",
Action::Navigate,
json!({ "url": flow.hotlist_url }),
zhihu_domain,
)?;
run_action(
transport,
browser_tool,
"wait for Zhihu hotlist items",
Action::WaitForSelector,
json!({ "selector": hotlist_item_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
zhihu_domain,
)?;
Ok(extract_url(&navigate.data).unwrap_or_else(|| flow.hotlist_url.clone()))
}
fn collect_comment_metrics<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
context: &CommentCollectionContext<'_>,
item_url: &str,
rank: usize,
) -> Result<Vec<ZhihuCommentMetricSnapshot>, ZhihuHotlistSkillError> {
let step_prefix = format!("collect comment metrics for hot item #{rank}");
run_action(
transport,
browser_tool,
&format!("{step_prefix}: navigate detail page"),
Action::Navigate,
json!({ "url": item_url }),
context.zhihu_domain,
)?;
run_action(
transport,
browser_tool,
&format!("{step_prefix}: wait for page root"),
Action::WaitForSelector,
json!({ "selector": context.page_root_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
context.zhihu_domain,
)?;
run_action(
transport,
browser_tool,
&format!("{step_prefix}: scroll toward comments"),
Action::ScrollTo,
json!({ "y": DEFAULT_COMMENT_SCROLL_Y }),
context.zhihu_domain,
)?;
run_action(
transport,
browser_tool,
&format!("{step_prefix}: wait for comment list"),
Action::WaitForSelector,
json!({ "selector": context.comment_list_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
context.zhihu_domain,
)?;
run_action(
transport,
browser_tool,
&format!("{step_prefix}: scroll comment list into view"),
Action::ScrollTo,
json!({ "selector": context.comment_list_selector }),
context.zhihu_domain,
)?;
let comments_html = run_action(
transport,
browser_tool,
&format!("{step_prefix}: capture page html for comments"),
Action::GetHtml,
json!({ "selector": context.page_root_selector, "outer": true }),
context.zhihu_domain,
)?;
Ok(parse_comment_metrics(
&comments_html.data,
context.comment_list_selector,
context.comment_item_selector,
context.comment_metric_selector,
context.comments_per_item,
))
}
fn run_action<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
step: &str,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, ZhihuHotlistSkillError> {
transport
.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: step.to_string(),
})
.map_err(|err| ZhihuHotlistSkillError::BrowserActionFailed {
step: step.to_string(),
message: err.to_string(),
})?;
let result = browser_tool
.invoke(action, params, expected_domain)
.map_err(|err| ZhihuHotlistSkillError::BrowserActionFailed {
step: step.to_string(),
message: err.to_string(),
})?;
if !result.success {
return Err(ZhihuHotlistSkillError::BrowserActionFailed {
step: step.to_string(),
message: result.data.to_string(),
});
}
Ok(result)
}
fn parse_hotlist_items(
data: &Value,
flow: &ZhihuHotlistFlow,
top_n: usize,
) -> Result<Vec<ParsedHotItem>, ZhihuHotlistSkillError> {
let html = extract_html(data);
if html.trim().is_empty() {
return Err(ZhihuHotlistSkillError::IncompleteHotlistHtml);
}
let document = Html::parse_document(&html);
let item_selector = parse_selector("hotlist_item", resolve_selector(flow, "hotlist_item")?)?;
let title_link_selector = parse_selector(
"hotlist_title_link",
resolve_selector(flow, "hotlist_title_link")?,
)?;
let summary_selector = parse_selector(
"hotlist_summary",
resolve_selector(flow, "hotlist_summary")?,
)?;
let heat_selector = parse_selector("hotlist_heat", resolve_selector(flow, "hotlist_heat")?)?;
let mut seen_urls = HashSet::new();
let mut items = Vec::new();
for (index, element) in document.select(&item_selector).enumerate() {
let Some(link) = element.select(&title_link_selector).next() else {
continue;
};
let title = compact_text(&link);
if title.is_empty() {
continue;
}
let href = link.value().attr("href").unwrap_or_default();
let url = normalize_zhihu_url(href);
if url.is_empty() || !seen_urls.insert(url.clone()) {
continue;
}
let summary = element
.select(&summary_selector)
.next()
.map(|node| compact_text(&node))
.unwrap_or_default();
let heat_text = element
.select(&heat_selector)
.next()
.map(|node| compact_text(&node))
.unwrap_or_default();
items.push(ParsedHotItem {
rank: index + 1,
item_id: derive_item_id(&url),
url,
title,
summary,
heat_text,
});
if items.len() >= top_n {
break;
}
}
if items.is_empty() {
return Err(ZhihuHotlistSkillError::NoHotlistItems);
}
for (index, item) in items.iter_mut().enumerate() {
item.rank = index + 1;
}
Ok(items)
}
fn parse_comment_metrics(
data: &Value,
comment_list_selector: &str,
comment_item_selector: &str,
comment_metric_selector: &str,
comments_per_item: usize,
) -> Vec<ZhihuCommentMetricSnapshot> {
let html = extract_html(data);
if html.trim().is_empty() {
return Vec::new();
}
let document = Html::parse_document(&html);
let comment_item_selector = match Selector::parse(comment_item_selector) {
Ok(selector) => selector,
Err(_) => return Vec::new(),
};
let metric_selector = match Selector::parse(comment_metric_selector) {
Ok(selector) => selector,
Err(_) => return Vec::new(),
};
let comment_list_selector = match Selector::parse(comment_list_selector) {
Ok(selector) => selector,
Err(_) => return Vec::new(),
};
let container = document
.select(&comment_list_selector)
.next()
.map(|node| node.html())
.unwrap_or_else(|| html.clone());
let scoped_document = Html::parse_fragment(&container);
scoped_document
.select(&comment_item_selector)
.take(comments_per_item)
.enumerate()
.map(|(index, element)| {
build_comment_metric_snapshot(index + 1, &element, &metric_selector)
})
.collect()
}
fn build_comment_metric_snapshot(
position: usize,
element: &ElementRef<'_>,
metric_selector: &Selector,
) -> ZhihuCommentMetricSnapshot {
let mut raw_metrics = BTreeMap::new();
let mut snapshot = ZhihuCommentMetricSnapshot {
position,
comment_id: element
.value()
.attr("data-id")
.or_else(|| element.value().attr("data-comment-id"))
.or_else(|| element.value().attr("id"))
.map(ToString::to_string),
reply_count: None,
upvote_count: None,
favorite_count: None,
heart_count: None,
raw_metrics: None,
};
for metric in element.select(metric_selector) {
let text = compact_text(&metric);
if text.is_empty() {
continue;
}
let count = parse_count_text(&text).or(Some(0));
let lowered = text.to_ascii_lowercase();
if text.contains("回复") {
snapshot.reply_count = count;
} else if text.contains("") || lowered.contains("upvote") {
snapshot.upvote_count = count;
} else if text.contains("收藏")
|| lowered.contains("favorite")
|| lowered.contains("bookmark")
{
snapshot.favorite_count = count;
} else if text.contains("喜欢")
|| text.contains("红心")
|| text.contains('❤')
|| text.contains('♥')
{
snapshot.heart_count = count;
} else if let Some(value) = count {
raw_metrics.insert(sanitize_metric_key(&text), value);
}
}
if !raw_metrics.is_empty() {
snapshot.raw_metrics = Some(raw_metrics);
}
snapshot
}
fn parse_selector(name: &str, raw: &str) -> Result<Selector, ZhihuHotlistSkillError> {
Selector::parse(raw).map_err(|err| ZhihuHotlistSkillError::InvalidSelector {
name: name.to_string(),
message: err.to_string(),
})
}
fn resolve_selector<'a>(
flow: &'a ZhihuHotlistFlow,
key: &str,
) -> Result<&'a str, ZhihuHotlistSkillError> {
flow.selectors
.get(key)
.map(String::as_str)
.ok_or_else(|| ZhihuHotlistSkillError::MissingSelector(key.to_string()))
}
fn resolve_domain(flow: &ZhihuHotlistFlow, key: &str) -> Result<String, ZhihuHotlistSkillError> {
flow.domains
.get(key)
.cloned()
.ok_or_else(|| ZhihuHotlistSkillError::MissingDomain(key.to_string()))
}
fn resolve_literal<'a>(
flow: &'a ZhihuHotlistFlow,
key: &str,
) -> Result<&'a str, ZhihuHotlistSkillError> {
flow.literals
.get(key)
.map(String::as_str)
.ok_or_else(|| ZhihuHotlistSkillError::MissingLiteral(key.to_string()))
}
fn extract_text(data: &Value) -> String {
data.get("text")
.and_then(Value::as_str)
.or_else(|| data.as_str())
.unwrap_or_default()
.trim()
.to_string()
}
fn extract_html(data: &Value) -> String {
data.get("html")
.and_then(Value::as_str)
.or_else(|| data.get("outer_html").and_then(Value::as_str))
.or_else(|| data.as_str())
.unwrap_or_default()
.to_string()
}
fn extract_url(data: &Value) -> Option<String> {
data.get("url")
.and_then(Value::as_str)
.map(ToString::to_string)
}
fn compact_text(element: &ElementRef<'_>) -> String {
element
.text()
.map(str::trim)
.filter(|text| !text.is_empty())
.collect::<Vec<_>>()
.join(" ")
}
fn normalize_zhihu_url(raw: &str) -> String {
let trimmed = raw.trim();
if trimmed.is_empty() {
return String::new();
}
if trimmed.starts_with("https://") || trimmed.starts_with("http://") {
return trimmed.split('#').next().unwrap_or(trimmed).to_string();
}
if let Some(rest) = trimmed.strip_prefix("//") {
return format!("https://{}", rest.split('#').next().unwrap_or(rest));
}
if trimmed.starts_with('/') {
return format!("https://www.zhihu.com{}", trimmed);
}
format!("https://www.zhihu.com/{}", trimmed.trim_start_matches('/'))
}
fn derive_item_id(url: &str) -> String {
let normalized = url
.trim()
.trim_start_matches("https://")
.trim_start_matches("http://");
let path = normalized
.split_once('/')
.map(|(_, path)| path)
.unwrap_or_default()
.split('?')
.next()
.unwrap_or_default()
.trim_matches('/');
if path.is_empty() {
"root".to_string()
} else {
path.replace('/', "_")
}
}
fn aggregate_comment_metrics(metrics: &[ZhihuCommentMetricSnapshot]) -> AggregatedCommentMetrics {
let mut totals = AggregatedCommentMetrics::default();
for metric in metrics {
totals.reply_count += metric.reply_count.unwrap_or(0);
totals.upvote_count += metric.upvote_count.unwrap_or(0);
totals.favorite_count += metric.favorite_count.unwrap_or(0);
totals.heart_count += metric.heart_count.unwrap_or(0);
}
totals
}
fn sanitize_metric_key(text: &str) -> String {
let compact = text
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() {
ch.to_ascii_lowercase()
} else {
'_'
}
})
.collect::<String>();
compact.trim_matches('_').to_string()
}
fn build_snapshot_id() -> String {
format!("{}-{}", now_unix_ms(), Uuid::new_v4())
}
fn now_unix_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}
fn parse_count_text(text: &str) -> Option<u64> {
let compact = text.replace([',', ' '], "");
let captures = count_regex().captures(&compact)?;
let number = captures.name("number")?.as_str().parse::<f64>().ok()?;
let unit = captures
.name("unit")
.map(|unit| unit.as_str())
.unwrap_or_default();
let multiplier = match unit {
"" | "w" | "W" => 10_000f64,
"亿" => 100_000_000f64,
"k" | "K" => 1_000f64,
"m" | "M" => 1_000_000f64,
_ => 1f64,
};
Some((number * multiplier).round() as u64)
}
fn count_regex() -> &'static Regex {
static REGEX: OnceLock<Regex> = OnceLock::new();
REGEX.get_or_init(|| {
Regex::new(r"(?P<number>\d+(?:\.\d+)?)\s*(?P<unit>万|亿|[kKmMwW])?").unwrap()
})
}
#[derive(Debug, Clone, Copy)]
struct CommentCollectionContext<'a> {
zhihu_domain: &'a str,
comment_list_selector: &'a str,
comment_item_selector: &'a str,
comment_metric_selector: &'a str,
page_root_selector: &'a str,
comments_per_item: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ParsedHotItem {
rank: usize,
item_id: String,
url: String,
title: String,
summary: String,
heat_text: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
struct AggregatedCommentMetrics {
reply_count: u64,
upvote_count: u64,
favorite_count: u64,
heart_count: u64,
}

View File

@@ -0,0 +1,184 @@
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct ZhihuHotlistIndex {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub latest_snapshot_id: Option<String>,
#[serde(default)]
pub snapshots: Vec<ZhihuHotlistIndexEntry>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ZhihuHotlistIndexEntry {
pub snapshot_id: String,
pub captured_at_ms: u64,
pub path: String,
pub item_count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ZhihuHotlistSnapshot {
pub snapshot_id: String,
pub captured_at_ms: u64,
pub page_url: String,
pub collector_version: String,
pub items: Vec<ZhihuHotItemSnapshot>,
pub collection_stats: ZhihuHotlistCollectionStats,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ZhihuHotItemSnapshot {
pub rank: usize,
pub item_id: String,
pub url: String,
pub title: String,
pub summary: String,
pub heat_text: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub heat_value: Option<u64>,
#[serde(default)]
pub comment_metrics: Vec<ZhihuCommentMetricSnapshot>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ZhihuCommentMetricSnapshot {
pub position: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub comment_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reply_count: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub upvote_count: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub favorite_count: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub heart_count: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub raw_metrics: Option<BTreeMap<String, u64>>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ZhihuHotlistCollectionStats {
pub requested_items: usize,
pub collected_items: usize,
pub items_with_comment_metrics: usize,
pub total_comment_metric_records: usize,
pub partial_items: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PersistedSnapshotPaths {
pub snapshot_path: PathBuf,
pub index_path: PathBuf,
}
#[derive(Debug, Error)]
pub enum ZhihuHotlistStoreError {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("json error: {0}")]
Json(#[from] serde_json::Error),
#[error("no persisted Zhihu hotlist snapshots found")]
NoSnapshots,
#[error("snapshot not found: {0}")]
SnapshotNotFound(String),
}
pub fn default_store_dir() -> PathBuf {
std::env::current_dir()
.unwrap_or_else(|_| PathBuf::from("."))
.join("data")
.join("zhihu_hotlist")
}
pub fn resolve_store_dir(store_dir: Option<&str>) -> PathBuf {
match store_dir {
Some(path) if !path.trim().is_empty() => PathBuf::from(path),
_ => default_store_dir(),
}
}
pub fn persist_snapshot(
base_dir: &Path,
snapshot: &ZhihuHotlistSnapshot,
) -> Result<PersistedSnapshotPaths, ZhihuHotlistStoreError> {
let snapshot_dir = snapshots_dir(base_dir);
fs::create_dir_all(&snapshot_dir)?;
let snapshot_rel_path = format!("snapshots/{}.json", snapshot.snapshot_id);
let snapshot_path = base_dir.join(&snapshot_rel_path);
fs::write(&snapshot_path, serde_json::to_vec_pretty(snapshot)?)?;
let mut index = load_index(base_dir)?;
index.latest_snapshot_id = Some(snapshot.snapshot_id.clone());
index
.snapshots
.retain(|entry| entry.snapshot_id != snapshot.snapshot_id);
index.snapshots.push(ZhihuHotlistIndexEntry {
snapshot_id: snapshot.snapshot_id.clone(),
captured_at_ms: snapshot.captured_at_ms,
path: snapshot_rel_path,
item_count: snapshot.items.len(),
});
index
.snapshots
.sort_by(|left, right| left.captured_at_ms.cmp(&right.captured_at_ms));
let index_path = index_path(base_dir);
fs::write(&index_path, serde_json::to_vec_pretty(&index)?)?;
Ok(PersistedSnapshotPaths {
snapshot_path,
index_path,
})
}
pub fn load_index(base_dir: &Path) -> Result<ZhihuHotlistIndex, ZhihuHotlistStoreError> {
let path = index_path(base_dir);
if !path.exists() {
return Ok(ZhihuHotlistIndex::default());
}
let contents = fs::read_to_string(path)?;
Ok(serde_json::from_str(&contents)?)
}
pub fn load_snapshot(
base_dir: &Path,
snapshot_id: &str,
) -> Result<ZhihuHotlistSnapshot, ZhihuHotlistStoreError> {
let path = base_dir
.join("snapshots")
.join(format!("{}.json", snapshot_id.trim()));
if !path.exists() {
return Err(ZhihuHotlistStoreError::SnapshotNotFound(
snapshot_id.trim().to_string(),
));
}
let contents = fs::read_to_string(path)?;
Ok(serde_json::from_str(&contents)?)
}
pub fn load_latest_snapshot(
base_dir: &Path,
) -> Result<ZhihuHotlistSnapshot, ZhihuHotlistStoreError> {
let index = load_index(base_dir)?;
let snapshot_id = index
.latest_snapshot_id
.ok_or(ZhihuHotlistStoreError::NoSnapshots)?;
load_snapshot(base_dir, &snapshot_id)
}
fn index_path(base_dir: &Path) -> PathBuf {
base_dir.join("index.json")
}
fn snapshots_dir(base_dir: &Path) -> PathBuf {
base_dir.join("snapshots")
}

View File

@@ -0,0 +1,890 @@
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use thiserror::Error;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, CommandOutput, Transport};
const DEFAULT_WAIT_TIMEOUT_MS: u64 = 5_000;
fn default_ensure_loaded() -> bool {
true
}
fn default_capture_url() -> bool {
true
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct ZhihuNavigateRequest {
pub page: String,
#[serde(default = "default_ensure_loaded")]
pub ensure_loaded: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct ZhihuNavigateResult {
pub summary: String,
pub page: String,
pub final_url: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuNavigationCatalog {
pub domains: HashMap<String, String>,
#[serde(default)]
pub routes: HashMap<String, ZhihuRouteDefinition>,
#[serde(default)]
pub components: HashMap<String, ZhihuComponentDefinition>,
#[serde(default)]
pub flows: HashMap<String, ZhihuFlowDefinition>,
pub targets: HashMap<String, ZhihuTargetDefinition>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuRouteDefinition {
pub title: String,
pub domain_ref: String,
pub url: String,
#[serde(default)]
pub aliases: Vec<String>,
pub wait_selector: Option<String>,
pub wait_timeout_ms: Option<u64>,
pub expect_selector: Option<String>,
pub expect_text: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuComponentDefinition {
pub title: String,
pub domain_ref: String,
pub selector: String,
#[serde(default)]
pub aliases: Vec<String>,
pub entry_route_ref: Option<String>,
pub result_domain_ref: Option<String>,
pub wait_selector: Option<String>,
pub wait_timeout_ms: Option<u64>,
pub expect_selector: Option<String>,
pub expect_text: Option<String>,
pub wait_after_ms: Option<u64>,
#[serde(default = "default_capture_url")]
pub capture_url: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ZhihuTargetKind {
Route,
Component,
Flow,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ZhihuSummaryKind {
Page,
Entry,
Menu,
Navigation,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuTargetDefinition {
pub title: String,
pub kind: ZhihuTargetKind,
pub summary_kind: Option<ZhihuSummaryKind>,
pub route_ref: Option<String>,
pub component_ref: Option<String>,
pub flow_ref: Option<String>,
#[serde(default)]
pub aliases: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuFlowDefinition {
pub steps: Vec<ZhihuFlowStep>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct ZhihuFlowStep {
pub name: String,
pub action: String,
pub route_ref: Option<String>,
pub component_ref: Option<String>,
pub expected_domain: Option<String>,
pub timeout_ms: Option<u64>,
pub wait_after_ms: Option<u64>,
#[serde(default)]
pub capture_url: bool,
pub expect_text: Option<String>,
pub log_message: String,
}
#[derive(Debug, Error)]
pub enum ZhihuNavigationError {
#[error("page 不能为空")]
EmptyPage,
#[error("failed to load zhihu navigation catalog: {0}")]
CatalogLoad(String),
#[error("unknown zhihu target: {0}")]
UnknownTarget(String),
#[error("missing domain in zhihu navigation catalog: {0}")]
MissingDomain(String),
#[error("missing route in zhihu navigation catalog: {0}")]
MissingRoute(String),
#[error("missing component in zhihu navigation catalog: {0}")]
MissingComponent(String),
#[error("missing flow in zhihu navigation catalog: {0}")]
MissingFlow(String),
#[error("invalid target definition in zhihu navigation catalog: {0}")]
InvalidTargetDefinition(String),
#[error("missing route ref in zhihu navigation flow step: {0}")]
MissingRouteRef(String),
#[error("missing component ref in zhihu navigation flow step: {0}")]
MissingComponentRef(String),
#[error("unknown action in zhihu navigation flow: {0}")]
UnknownAction(String),
#[error("browser action failed at step {step}: {message}")]
BrowserActionFailed { step: String, message: String },
#[error("step {step} expected text containing `{expected}`, got `{actual}`")]
ExpectedTextMissing {
step: String,
expected: String,
actual: String,
},
}
#[derive(Debug, Default)]
struct ExecutionState {
final_url: Option<String>,
}
#[derive(Debug, Clone, Copy)]
struct PostActionChecks<'a> {
expected_domain: &'a str,
wait_selector: Option<&'a str>,
wait_timeout_ms: Option<u64>,
expect_selector: Option<&'a str>,
expect_text: Option<&'a str>,
reset_url_when_absent: bool,
}
pub fn default_catalog_path() -> PathBuf {
super::default_skill_resource_path("zhihu_navigation_pages.json")
}
pub fn load_catalog() -> Result<ZhihuNavigationCatalog, ZhihuNavigationError> {
let path = default_catalog_path();
let contents = fs::read_to_string(&path).map_err(|err| {
ZhihuNavigationError::CatalogLoad(format!("{} ({})", err, path.display()))
})?;
serde_json::from_str(&contents)
.map_err(|err| ZhihuNavigationError::CatalogLoad(format!("{} ({})", err, path.display())))
}
pub fn try_route_alias(
instruction: &str,
) -> Result<Option<ZhihuNavigateRequest>, ZhihuNavigationError> {
let trimmed = instruction.trim();
if !looks_like_navigation_intent(trimmed) {
return Ok(None);
}
let catalog = load_catalog()?;
let normalized_instruction = normalize_text(trimmed);
let mut matches = Vec::new();
for (target_key, target) in &catalog.targets {
let score = best_target_match_score(&catalog, target, &normalized_instruction);
if score > 0 {
matches.push((target_key.as_str(), score));
}
}
if matches.is_empty() {
return Ok(None);
}
matches.sort_by(|left, right| right.1.cmp(&left.1).then_with(|| left.0.cmp(right.0)));
if matches.len() > 1 && matches[0].1 == matches[1].1 {
return Ok(None);
}
Ok(Some(ZhihuNavigateRequest {
page: matches[0].0.to_string(),
ensure_loaded: true,
}))
}
pub fn execute<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
req: ZhihuNavigateRequest,
) -> Result<ZhihuNavigateResult, ZhihuNavigationError> {
validate_request(&req)?;
let catalog = load_catalog()?;
let target_key = req.page.trim();
let target = resolve_target(&catalog, target_key)?;
let mut state = ExecutionState::default();
match target.kind {
ZhihuTargetKind::Route => {
let route_ref = target.route_ref.as_deref().ok_or_else(|| {
ZhihuNavigationError::InvalidTargetDefinition(target_key.to_string())
})?;
run_route(
transport,
browser_tool,
&catalog,
route_ref,
req.ensure_loaded,
&mut state,
)?;
}
ZhihuTargetKind::Component => {
let component_ref = target.component_ref.as_deref().ok_or_else(|| {
ZhihuNavigationError::InvalidTargetDefinition(target_key.to_string())
})?;
run_component_target(
transport,
browser_tool,
&catalog,
component_ref,
req.ensure_loaded,
&mut state,
)?;
}
ZhihuTargetKind::Flow => {
let flow_ref = target.flow_ref.as_deref().ok_or_else(|| {
ZhihuNavigationError::InvalidTargetDefinition(target_key.to_string())
})?;
run_flow(transport, browser_tool, &catalog, flow_ref, &mut state)?;
}
}
let final_url = state.final_url.unwrap_or_default();
Ok(ZhihuNavigateResult {
summary: build_summary(target, &final_url),
page: target_key.to_string(),
final_url,
})
}
fn validate_request(req: &ZhihuNavigateRequest) -> Result<(), ZhihuNavigationError> {
if req.page.trim().is_empty() {
return Err(ZhihuNavigationError::EmptyPage);
}
Ok(())
}
fn run_route<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
catalog: &ZhihuNavigationCatalog,
route_ref: &str,
ensure_loaded: bool,
state: &mut ExecutionState,
) -> Result<(), ZhihuNavigationError> {
let route = resolve_route(catalog, route_ref)?;
let expected_domain = resolve_domain(catalog, &route.domain_ref)?;
send_log(transport, &format!("navigate {}", route.url), "navigate")?;
let navigate_result = invoke_browser_action(
browser_tool,
Action::Navigate,
json!({ "url": route.url }),
expected_domain.as_str(),
"navigate",
)?;
state.final_url = Some(extract_url(&navigate_result.data).unwrap_or_else(|| route.url.clone()));
if ensure_loaded {
run_post_action_checks(
transport,
browser_tool,
PostActionChecks {
expected_domain: expected_domain.as_str(),
wait_selector: route.wait_selector.as_deref(),
wait_timeout_ms: route.wait_timeout_ms,
expect_selector: route.expect_selector.as_deref(),
expect_text: route.expect_text.as_deref(),
reset_url_when_absent: false,
},
state,
)?;
}
Ok(())
}
fn run_component_target<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
catalog: &ZhihuNavigationCatalog,
component_ref: &str,
ensure_loaded: bool,
state: &mut ExecutionState,
) -> Result<(), ZhihuNavigationError> {
let component = resolve_component(catalog, component_ref)?;
if let Some(entry_route_ref) = component.entry_route_ref.as_deref() {
run_route(
transport,
browser_tool,
catalog,
entry_route_ref,
false,
state,
)?;
}
let expected_domain = resolve_domain(catalog, &component.domain_ref)?;
send_log(
transport,
&format!("click {}", component.title),
component_ref,
)?;
let click_result = invoke_browser_action(
browser_tool,
Action::Click,
build_click_params(component.selector.as_str(), component.wait_after_ms),
expected_domain.as_str(),
component_ref,
)?;
if component.capture_url {
if let Some(url) = extract_url(&click_result.data) {
state.final_url = Some(url);
}
}
if ensure_loaded {
let result_domain_ref = component
.result_domain_ref
.as_deref()
.unwrap_or(component.domain_ref.as_str());
let result_domain = resolve_domain(catalog, result_domain_ref)?;
run_post_action_checks(
transport,
browser_tool,
PostActionChecks {
expected_domain: result_domain.as_str(),
wait_selector: component.wait_selector.as_deref(),
wait_timeout_ms: component.wait_timeout_ms,
expect_selector: component.expect_selector.as_deref(),
expect_text: component.expect_text.as_deref(),
reset_url_when_absent: !component.capture_url,
},
state,
)?;
}
Ok(())
}
fn run_flow<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
catalog: &ZhihuNavigationCatalog,
flow_ref: &str,
state: &mut ExecutionState,
) -> Result<(), ZhihuNavigationError> {
let flow = resolve_flow(catalog, flow_ref)?;
for step in &flow.steps {
send_log(transport, &step.log_message, step.name.as_str())?;
let action = parse_action(&step.action)?;
let is_navigate = matches!(action, Action::Navigate);
let (expected_domain, params, fallback_url) =
build_flow_step_action(catalog, step, &action)?;
let result = invoke_browser_action(
browser_tool,
action,
params,
expected_domain.as_str(),
step.name.as_str(),
)?;
if is_navigate {
state.final_url = Some(
extract_url(&result.data)
.or(fallback_url.clone())
.unwrap_or_default(),
);
} else if step.capture_url {
if let Some(url) = extract_url(&result.data) {
state.final_url = Some(url);
}
}
if let Some(expected_text) = step.expect_text.as_deref() {
let actual = extract_content(&result.data);
if !actual.contains(expected_text) {
return Err(ZhihuNavigationError::ExpectedTextMissing {
step: step.name.clone(),
expected: expected_text.to_string(),
actual,
});
}
}
}
Ok(())
}
fn build_flow_step_action(
catalog: &ZhihuNavigationCatalog,
step: &ZhihuFlowStep,
action: &Action,
) -> Result<(String, Value, Option<String>), ZhihuNavigationError> {
match action {
Action::Navigate => {
let route_ref = step
.route_ref
.as_deref()
.ok_or_else(|| ZhihuNavigationError::MissingRouteRef(step.name.clone()))?;
let route = resolve_route(catalog, route_ref)?;
let domain_key = step
.expected_domain
.as_deref()
.unwrap_or(route.domain_ref.as_str());
let expected_domain = resolve_domain(catalog, domain_key)?;
Ok((
expected_domain,
json!({ "url": route.url }),
Some(route.url.clone()),
))
}
Action::Click => {
let component_ref = step
.component_ref
.as_deref()
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
let component = resolve_component(catalog, component_ref)?;
let domain_key = step
.expected_domain
.as_deref()
.unwrap_or(component.domain_ref.as_str());
let expected_domain = resolve_domain(catalog, domain_key)?;
let wait_after_ms = step.wait_after_ms.or(component.wait_after_ms);
Ok((
expected_domain,
build_click_params(component.selector.as_str(), wait_after_ms),
None,
))
}
Action::WaitForSelector => {
let component_ref = step
.component_ref
.as_deref()
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
let component = resolve_component(catalog, component_ref)?;
let domain_key = step
.expected_domain
.as_deref()
.unwrap_or(component.domain_ref.as_str());
let expected_domain = resolve_domain(catalog, domain_key)?;
Ok((
expected_domain,
json!({
"selector": component.selector,
"timeout_ms": step.timeout_ms.unwrap_or(component.wait_timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS)),
}),
None,
))
}
Action::GetText => {
let component_ref = step
.component_ref
.as_deref()
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
let component = resolve_component(catalog, component_ref)?;
let domain_key = step
.expected_domain
.as_deref()
.unwrap_or(component.domain_ref.as_str());
let expected_domain = resolve_domain(catalog, domain_key)?;
Ok((
expected_domain,
json!({ "selector": component.selector }),
None,
))
}
Action::GetHtml => {
let component_ref = step
.component_ref
.as_deref()
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
let component = resolve_component(catalog, component_ref)?;
let domain_key = step
.expected_domain
.as_deref()
.unwrap_or(component.domain_ref.as_str());
let expected_domain = resolve_domain(catalog, domain_key)?;
Ok((
expected_domain,
json!({ "selector": component.selector, "outer": true }),
None,
))
}
Action::ScrollTo => {
let component_ref = step
.component_ref
.as_deref()
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
let component = resolve_component(catalog, component_ref)?;
let domain_key = step
.expected_domain
.as_deref()
.unwrap_or(component.domain_ref.as_str());
let expected_domain = resolve_domain(catalog, domain_key)?;
Ok((
expected_domain,
json!({ "selector": component.selector }),
None,
))
}
other => Err(ZhihuNavigationError::UnknownAction(
other.as_str().to_string(),
)),
}
}
fn run_post_action_checks<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
checks: PostActionChecks<'_>,
state: &mut ExecutionState,
) -> Result<(), ZhihuNavigationError> {
if let Some(selector) = checks.wait_selector {
send_log(
transport,
&format!("wait for {selector}"),
"wait_for_selector",
)?;
let wait_result = invoke_browser_action(
browser_tool,
Action::WaitForSelector,
json!({
"selector": selector,
"timeout_ms": checks.wait_timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS),
}),
checks.expected_domain,
"wait_for_selector",
)?;
if let Some(url) = extract_url(&wait_result.data) {
state.final_url = Some(url);
} else if checks.reset_url_when_absent {
state.final_url = None;
}
}
if let (Some(selector), Some(expected_text)) = (checks.expect_selector, checks.expect_text) {
send_log(transport, &format!("verify {selector}"), "verify_text")?;
let text_result = invoke_browser_action(
browser_tool,
Action::GetText,
json!({ "selector": selector }),
checks.expected_domain,
"verify_text",
)?;
if let Some(url) = extract_url(&text_result.data) {
state.final_url = Some(url);
} else if checks.reset_url_when_absent {
state.final_url = None;
}
let actual = extract_content(&text_result.data);
if !actual.contains(expected_text) {
return Err(ZhihuNavigationError::ExpectedTextMissing {
step: "verify_text".to_string(),
expected: expected_text.to_string(),
actual,
});
}
}
Ok(())
}
fn send_log<T: Transport>(
transport: &T,
message: &str,
step: &str,
) -> Result<(), ZhihuNavigationError> {
transport
.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: message.to_string(),
})
.map_err(|err| ZhihuNavigationError::BrowserActionFailed {
step: step.to_string(),
message: err.to_string(),
})
}
fn invoke_browser_action<T: Transport>(
browser_tool: &BrowserPipeTool<T>,
action: Action,
params: Value,
expected_domain: &str,
step: &str,
) -> Result<CommandOutput, ZhihuNavigationError> {
let result = browser_tool
.invoke(action, params, expected_domain)
.map_err(|err| ZhihuNavigationError::BrowserActionFailed {
step: step.to_string(),
message: err.to_string(),
})?;
if !result.success {
return Err(ZhihuNavigationError::BrowserActionFailed {
step: step.to_string(),
message: result.data.to_string(),
});
}
Ok(result)
}
fn build_click_params(selector: &str, wait_after_ms: Option<u64>) -> Value {
let mut params = serde_json::Map::new();
params.insert("selector".to_string(), Value::String(selector.to_string()));
if let Some(wait_after_ms) = wait_after_ms {
params.insert("wait_after".to_string(), Value::from(wait_after_ms));
}
Value::Object(params)
}
fn parse_action(name: &str) -> Result<Action, ZhihuNavigationError> {
match name {
"click" => Ok(Action::Click),
"navigate" => Ok(Action::Navigate),
"getText" => Ok(Action::GetText),
"getHtml" => Ok(Action::GetHtml),
"waitForSelector" => Ok(Action::WaitForSelector),
"scrollTo" => Ok(Action::ScrollTo),
other => Err(ZhihuNavigationError::UnknownAction(other.to_string())),
}
}
fn resolve_target<'a>(
catalog: &'a ZhihuNavigationCatalog,
target_key: &str,
) -> Result<&'a ZhihuTargetDefinition, ZhihuNavigationError> {
catalog
.targets
.get(target_key)
.ok_or_else(|| ZhihuNavigationError::UnknownTarget(target_key.to_string()))
}
fn resolve_route<'a>(
catalog: &'a ZhihuNavigationCatalog,
route_ref: &str,
) -> Result<&'a ZhihuRouteDefinition, ZhihuNavigationError> {
catalog
.routes
.get(route_ref)
.ok_or_else(|| ZhihuNavigationError::MissingRoute(route_ref.to_string()))
}
fn resolve_component<'a>(
catalog: &'a ZhihuNavigationCatalog,
component_ref: &str,
) -> Result<&'a ZhihuComponentDefinition, ZhihuNavigationError> {
catalog
.components
.get(component_ref)
.ok_or_else(|| ZhihuNavigationError::MissingComponent(component_ref.to_string()))
}
fn resolve_flow<'a>(
catalog: &'a ZhihuNavigationCatalog,
flow_ref: &str,
) -> Result<&'a ZhihuFlowDefinition, ZhihuNavigationError> {
catalog
.flows
.get(flow_ref)
.ok_or_else(|| ZhihuNavigationError::MissingFlow(flow_ref.to_string()))
}
fn resolve_domain(
catalog: &ZhihuNavigationCatalog,
key: &str,
) -> Result<String, ZhihuNavigationError> {
catalog
.domains
.get(key)
.cloned()
.ok_or_else(|| ZhihuNavigationError::MissingDomain(key.to_string()))
}
fn best_target_match_score(
catalog: &ZhihuNavigationCatalog,
target: &ZhihuTargetDefinition,
normalized_instruction: &str,
) -> usize {
let best_len = collect_target_aliases(catalog, target)
.into_iter()
.map(|alias| normalize_text(alias.as_str()))
.filter(|alias| !alias.is_empty() && normalized_instruction.contains(alias))
.map(|alias| alias.len())
.max()
.unwrap_or(0);
if best_len == 0 {
return 0;
}
best_len * 100 + match_bonus(target, normalized_instruction)
}
fn collect_target_aliases(
catalog: &ZhihuNavigationCatalog,
target: &ZhihuTargetDefinition,
) -> Vec<String> {
let mut aliases = Vec::new();
aliases.push(target.title.clone());
aliases.extend(target.aliases.iter().cloned());
if let Some(route_ref) = target.route_ref.as_deref() {
if let Some(route) = catalog.routes.get(route_ref) {
aliases.push(route.title.clone());
aliases.extend(route.aliases.iter().cloned());
}
}
if let Some(component_ref) = target.component_ref.as_deref() {
if let Some(component) = catalog.components.get(component_ref) {
aliases.push(component.title.clone());
aliases.extend(component.aliases.iter().cloned());
}
}
aliases.retain(|alias| !alias.trim().is_empty());
aliases.sort();
aliases.dedup();
aliases
}
fn match_bonus(target: &ZhihuTargetDefinition, normalized_instruction: &str) -> usize {
let mut bonus = 0;
let summary_kind = target_summary_kind(target);
if normalized_instruction.contains("页面") && summary_kind == ZhihuSummaryKind::Page {
bonus += 20;
}
if ["按钮", "入口"]
.iter()
.any(|token| normalized_instruction.contains(token))
&& summary_kind == ZhihuSummaryKind::Entry
{
bonus += 20;
}
if ["菜单", "下拉"]
.iter()
.any(|token| normalized_instruction.contains(token))
&& summary_kind == ZhihuSummaryKind::Menu
{
bonus += 20;
}
bonus
}
fn target_summary_kind(target: &ZhihuTargetDefinition) -> ZhihuSummaryKind {
target.summary_kind.unwrap_or(match target.kind {
ZhihuTargetKind::Route => ZhihuSummaryKind::Page,
ZhihuTargetKind::Component => ZhihuSummaryKind::Entry,
ZhihuTargetKind::Flow => ZhihuSummaryKind::Navigation,
})
}
fn build_summary(target: &ZhihuTargetDefinition, final_url: &str) -> String {
match target_summary_kind(target) {
ZhihuSummaryKind::Page => {
format!("知乎页面已打开:{} ({final_url})", target.title)
}
ZhihuSummaryKind::Entry => {
if final_url.is_empty() {
format!("知乎入口已打开:{}", target.title)
} else {
format!("知乎入口已打开:{} ({final_url})", target.title)
}
}
ZhihuSummaryKind::Menu => format!("知乎菜单已打开:{}", target.title),
ZhihuSummaryKind::Navigation => {
if final_url.is_empty() {
format!("知乎导航已完成:{}", target.title)
} else {
format!("知乎导航已完成:{} ({final_url})", target.title)
}
}
}
}
fn looks_like_navigation_intent(instruction: &str) -> bool {
let normalized = normalize_text(instruction);
let has_platform = ["知乎", "专栏", "创作中心", "创作者中心"]
.iter()
.any(|token| normalized.contains(token));
let has_verb = ["打开", "进入", "跳转", "前往", "", "点开", "展开", "切到"]
.iter()
.any(|token| normalized.contains(token));
has_platform && has_verb
}
fn normalize_text(text: &str) -> String {
text.chars()
.filter(|ch| {
!ch.is_whitespace()
&& !matches!(
ch,
'' | '。'
| ''
| ''
| ''
| ''
| '、'
| ''
| ''
| '【'
| '】'
| ','
| '.'
| ':'
| ';'
| '!'
| '?'
| '('
| ')'
| '['
| ']'
| '"'
| '\''
| '/'
| '\\'
| '-'
| '_'
)
})
.flat_map(|ch| ch.to_lowercase())
.collect()
}
fn extract_content(data: &Value) -> String {
data.get("text")
.and_then(Value::as_str)
.or_else(|| data.get("html").and_then(Value::as_str))
.unwrap_or_default()
.trim()
.to_string()
}
fn extract_url(data: &Value) -> Option<String> {
data.get("url")
.and_then(Value::as_str)
.map(str::trim)
.filter(|url| !url.is_empty())
.map(ToOwned::to_owned)
}

View File

@@ -17,6 +17,7 @@ impl MockTransport {
} }
} }
#[allow(dead_code)]
pub fn sent_messages(&self) -> Vec<AgentMessage> { pub fn sent_messages(&self) -> Vec<AgentMessage> {
self.sent.lock().unwrap().clone() self.sent.lock().unwrap().clone()
} }

View File

@@ -26,7 +26,9 @@ fn test_policy() -> MacPolicy {
.unwrap() .unwrap()
} }
fn build_adapter(messages: Vec<BrowserMessage>) -> (Arc<MockTransport>, ZeroClawBrowserTool<MockTransport>) { fn build_adapter(
messages: Vec<BrowserMessage>,
) -> (Arc<MockTransport>, ZeroClawBrowserTool<MockTransport>) {
let transport = Arc::new(MockTransport::new(messages)); let transport = Arc::new(MockTransport::new(messages));
let browser_tool = BrowserPipeTool::new( let browser_tool = BrowserPipeTool::new(
transport.clone(), transport.clone(),
@@ -193,13 +195,11 @@ async fn zeroclaw_browser_tool_keeps_domain_validation_in_mac_policy() {
assert!(!result.success); assert!(!result.success);
assert!(result.output.is_empty()); assert!(result.output.is_empty());
assert_eq!(transport.sent_messages().len(), 0); assert_eq!(transport.sent_messages().len(), 0);
assert!( assert!(result
result .error
.error .as_deref()
.as_deref() .unwrap()
.unwrap() .contains("domain is not allowed"));
.contains("domain is not allowed")
);
} }
#[tokio::test] #[tokio::test]
@@ -232,25 +232,19 @@ async fn zeroclaw_browser_tool_rejects_missing_required_action_parameters() {
assert!(!missing_text_selector.success); assert!(!missing_text_selector.success);
assert!(!missing_navigate_url.success); assert!(!missing_navigate_url.success);
assert_eq!(transport.sent_messages().len(), 0); assert_eq!(transport.sent_messages().len(), 0);
assert!( assert!(missing_click_selector
missing_click_selector .error
.error .as_deref()
.as_deref() .unwrap()
.unwrap() .contains("click requires selector"));
.contains("click requires selector") assert!(missing_text_selector
); .error
assert!( .as_deref()
missing_text_selector .unwrap()
.error .contains("getText requires selector"));
.as_deref() assert!(missing_navigate_url
.unwrap() .error
.contains("getText requires selector") .as_deref()
); .unwrap()
assert!( .contains("navigate requires url"));
missing_navigate_url
.error
.as_deref()
.unwrap()
.contains("navigate requires url")
);
} }

View File

@@ -3,9 +3,7 @@ use std::path::Path;
use std::sync::{Mutex, OnceLock}; use std::sync::{Mutex, OnceLock};
use sgclaw::compat::config_adapter::{ use sgclaw::compat::config_adapter::{
build_zeroclaw_config, build_zeroclaw_config, build_zeroclaw_config_from_settings, zeroclaw_workspace_dir,
build_zeroclaw_config_from_settings,
zeroclaw_workspace_dir,
}; };
use sgclaw::config::DeepSeekSettings; use sgclaw::config::DeepSeekSettings;
use uuid::Uuid; use uuid::Uuid;
@@ -49,11 +47,17 @@ fn zeroclaw_config_adapter_uses_deterministic_workspace_dir() {
let workspace_dir = zeroclaw_workspace_dir(Path::new("/var/lib/sgclaw")); let workspace_dir = zeroclaw_workspace_dir(Path::new("/var/lib/sgclaw"));
let config = build_zeroclaw_config_from_settings(Path::new("/var/lib/sgclaw"), &settings); let config = build_zeroclaw_config_from_settings(Path::new("/var/lib/sgclaw"), &settings);
assert_eq!(workspace_dir, Path::new("/var/lib/sgclaw/.sgclaw-zeroclaw-workspace")); assert_eq!(
workspace_dir,
Path::new("/var/lib/sgclaw/.sgclaw-zeroclaw-workspace")
);
assert_eq!(config.workspace_dir, workspace_dir); assert_eq!(config.workspace_dir, workspace_dir);
assert_eq!(config.default_provider.as_deref(), Some("deepseek")); assert_eq!(config.default_provider.as_deref(), Some("deepseek"));
assert_eq!(config.default_model.as_deref(), Some("deepseek-reasoner")); assert_eq!(config.default_model.as_deref(), Some("deepseek-reasoner"));
assert_eq!(config.api_url.as_deref(), Some("https://proxy.example.com/v1")); assert_eq!(
config.api_url.as_deref(),
Some("https://proxy.example.com/v1")
);
} }
#[test] #[test]

View File

@@ -3,7 +3,7 @@ mod common;
use std::fs; use std::fs;
use std::io::{Read, Write}; use std::io::{Read, Write};
use std::net::TcpListener; use std::net::TcpListener;
use std::path::PathBuf; use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, OnceLock}; use std::sync::{Arc, Mutex, OnceLock};
use std::thread; use std::thread;
use std::time::Duration; use std::time::Duration;
@@ -11,9 +11,7 @@ use std::time::Duration;
use common::MockTransport; use common::MockTransport;
use serde_json::{json, Value}; use serde_json::{json, Value};
use sgclaw::agent::{ use sgclaw::agent::{
handle_browser_message, handle_browser_message, handle_browser_message_with_context, AgentRuntimeContext,
handle_browser_message_with_context,
AgentRuntimeContext,
}; };
use sgclaw::compat::runtime::{execute_task, CompatTaskContext}; use sgclaw::compat::runtime::{execute_task, CompatTaskContext};
use sgclaw::config::DeepSeekSettings; use sgclaw::config::DeepSeekSettings;
@@ -48,7 +46,7 @@ fn temp_workspace_root() -> PathBuf {
root root
} }
fn write_deepseek_config(root: &PathBuf, api_key: &str, base_url: &str, model: &str) -> PathBuf { fn write_deepseek_config(root: &Path, api_key: &str, base_url: &str, model: &str) -> PathBuf {
let config_path = root.join("sgclaw_config.json"); let config_path = root.join("sgclaw_config.json");
fs::write( fs::write(
&config_path, &config_path,
@@ -94,7 +92,7 @@ fn start_fake_deepseek_server(
let payload = response.to_string(); let payload = response.to_string();
let reply = format!( let reply = format!(
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
payload.as_bytes().len(), payload.len(),
payload payload
); );
stream.write_all(reply.as_bytes()).unwrap(); stream.write_all(reply.as_bytes()).unwrap();
@@ -281,7 +279,8 @@ fn compat_runtime_uses_zeroclaw_provider_path_and_executes_browser_actions() {
} }
#[test] #[test]
fn handle_browser_message_prefers_compat_runtime_for_supported_instruction_when_deepseek_is_configured() { fn handle_browser_message_prefers_compat_runtime_for_supported_instruction_when_deepseek_is_configured(
) {
let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner()); let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner());
let first_response = json!({ let first_response = json!({
@@ -643,11 +642,9 @@ fn compat_runtime_includes_prior_turns_in_follow_up_provider_request() {
assert_eq!(summary, "已在知乎搜索天气"); assert_eq!(summary, "已在知乎搜索天气");
assert!(first_request_messages.iter().any(|message| { assert!(first_request_messages.iter().any(|message| {
message["role"] == json!("user") message["role"] == json!("user") && message["content"] == json!("打开百度搜索天气")
&& message["content"] == json!("打开百度搜索天气")
})); }));
assert!(first_request_messages.iter().any(|message| { assert!(first_request_messages.iter().any(|message| {
message["role"] == json!("assistant") message["role"] == json!("assistant") && message["content"] == json!("已在百度搜索天气")
&& message["content"] == json!("已在百度搜索天气")
})); }));
} }

View File

@@ -60,8 +60,5 @@ fn deepseek_request_shape_matches_openai_compatible_chat_format() {
assert_eq!(serialized["messages"][0]["role"], "system"); assert_eq!(serialized["messages"][0]["role"], "system");
assert_eq!(serialized["messages"][1]["content"], "打开百度搜索天气"); assert_eq!(serialized["messages"][1]["content"], "打开百度搜索天气");
assert_eq!(serialized["tools"][0]["type"], "function"); assert_eq!(serialized["tools"][0]["type"], "function");
assert_eq!( assert_eq!(serialized["tools"][0]["function"]["name"], "browser_action");
serialized["tools"][0]["function"]["name"],
"browser_action"
);
} }

View File

@@ -12,9 +12,9 @@ fn test_policy() -> MacPolicy {
MacPolicy::from_json_str( MacPolicy::from_json_str(
r#"{ r#"{
"version": "1.0", "version": "1.0",
"domains": { "allowed": ["oa.example.com", "www.baidu.com"] }, "domains": { "allowed": ["oa.example.com", "www.baidu.com", "www.zhihu.com", "zhuanlan.zhihu.com"] },
"pipe_actions": { "pipe_actions": {
"allowed": ["click", "type", "navigate", "getText"], "allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
"blocked": ["eval", "executeJsInPage"] "blocked": ["eval", "executeJsInPage"]
} }
}"#, }"#,
@@ -120,3 +120,116 @@ fn submit_task_sends_three_commands_and_finishes_with_task_complete() {
if *success && summary == "已在百度搜索天气" if *success && summary == "已在百度搜索天气"
)); ));
} }
#[test]
fn explicit_zhihu_skill_short_circuits_before_planner_fallback() {
let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/creator/analytics/work/all" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 20,
},
}]));
let tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message(
transport.as_ref(),
&tool,
BrowserMessage::SubmitTask {
instruction:
r#"skill:zhihu_navigate {"page":"content_analysis","ensure_loaded":false}"#
.to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(sent.len(), 3);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info"
&& message == "navigate https://www.zhihu.com/creator/analytics/work/all"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, security, .. }
if *seq == 1
&& action == &Action::Navigate
&& security.expected_domain == "www.zhihu.com"
));
assert!(matches!(
&sent[2],
AgentMessage::TaskComplete { success, summary }
if *success
&& summary
== "知乎页面已打开:内容分析 (https://www.zhihu.com/creator/analytics/work/all)"
));
}
#[test]
fn natural_language_zhihu_navigation_short_circuits_before_planner_fallback() {
let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 20,
},
}]));
let tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message(
transport.as_ref(),
&tool,
BrowserMessage::SubmitTask {
instruction: "打开知乎首页".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(sent.len(), 3);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, security, .. }
if *seq == 1
&& action == &Action::Navigate
&& security.expected_domain == "www.zhihu.com"
));
assert!(matches!(
&sent[2],
AgentMessage::TaskComplete { success, summary }
if *success && summary == "知乎页面已打开:首页 (https://www.zhihu.com/)"
));
}

441
tests/skill_router_test.rs Normal file
View File

@@ -0,0 +1,441 @@
use sgclaw::skill::router::{route_instruction, RoutedSkill, RouterError};
#[test]
fn route_instruction_parses_explicit_zhihu_skill() {
let routed = route_instruction(
r#"skill:zhihu_write {"title":"自动发文能力测试","body":"第一段\n\n第二段","publish":false}"#,
)
.unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuWrite(req))
if req.title == "自动发文能力测试"
&& req.body == "第一段\n\n第二段"
&& !req.publish
));
}
#[test]
fn route_instruction_parses_explicit_zhihu_hotlist_collect_skill() {
let routed = route_instruction(
r#"skill:zhihu_hotlist_collect {"top_n":5,"comments_per_item":8,"store_dir":"data/zhihu_hotlist"}"#,
)
.unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuHotlistCollect(req))
if req.top_n == 5
&& req.comments_per_item == 8
&& req.store_dir.as_deref() == Some("data/zhihu_hotlist")
));
}
#[test]
fn route_instruction_parses_explicit_zhihu_hotlist_report_skill() {
let routed =
route_instruction(r#"skill:zhihu_hotlist_report {"snapshot_id":"snap-1","top_n":3}"#)
.unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuHotlistReport(req))
if req.snapshot_id.as_deref() == Some("snap-1")
&& req.top_n == 3
));
}
#[test]
fn route_instruction_parses_explicit_zhihu_navigation_skill() {
let routed = route_instruction(
r#"skill:zhihu_navigate {"page":"content_analysis","ensure_loaded":true}"#,
)
.unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "content_analysis" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_content_analysis_natural_language() {
let routed = route_instruction("帮我打开知乎中的内容分析页面").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "content_analysis" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_short_zhihu_content_analysis_phrase() {
let routed = route_instruction("打开知乎内容分析").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "content_analysis" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_income_analysis_natural_language() {
let routed = route_instruction("打开知乎收益分析页面").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "income_analysis" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_home_natural_language() {
let routed = route_instruction("打开知乎首页").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "home" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_hot_list_natural_language() {
let routed = route_instruction("打开知乎热榜页面").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "hot_list" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_column_home_natural_language() {
let routed = route_instruction("打开知乎专栏页").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "column_home" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_question_page_natural_language() {
let routed = route_instruction("打开知乎问题页").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "question_page" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_messages_page_natural_language() {
let routed = route_instruction("打开知乎消息分栏").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "messages_page" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_messages_all_tab_natural_language() {
let routed = route_instruction("打开知乎消息分栏全部私信").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "messages_all_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_messages_unread_tab_natural_language() {
let routed = route_instruction("打开知乎消息分栏未读消息").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "messages_unread_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_messages_strangers_tab_natural_language() {
let routed = route_instruction("打开知乎消息分栏陌生人消息").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "messages_strangers_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_messages_settings_menu_natural_language() {
let routed = route_instruction("打开知乎消息设置菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "messages_settings_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_page_natural_language() {
let routed = route_instruction("打开知乎通知分栏").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_page" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_replies_tab_natural_language() {
let routed = route_instruction("打开知乎通知分栏回复我的").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_replies_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_votes_favorites_tab_natural_language() {
let routed = route_instruction("打开知乎通知分栏赞同与收藏").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_votes_favorites_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_follows_tab_natural_language() {
let routed = route_instruction("打开知乎通知分栏关注我的").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_follows_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_system_tab_natural_language() {
let routed = route_instruction("打开知乎通知分栏系统通知").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_system_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_settings_menu_natural_language() {
let routed = route_instruction("打开知乎通知设置菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_settings_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_profile_page_natural_language() {
let routed = route_instruction("打开知乎个人主页").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "profile_page" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_profile_answers_tab_natural_language() {
let routed = route_instruction("打开知乎个人主页回答分栏").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "profile_answers_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_profile_followers_tab_natural_language() {
let routed = route_instruction("打开知乎个人主页粉丝分栏").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "profile_followers_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_account_settings_natural_language() {
let routed = route_instruction("打开知乎账号设置菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "settings_account_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_privacy_settings_natural_language() {
let routed = route_instruction("打开知乎隐私设置菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "settings_privacy_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_security_settings_natural_language() {
let routed = route_instruction("打开知乎安全设置菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "settings_security_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_search_filter_menu_natural_language() {
let routed = route_instruction("打开知乎搜索筛选菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "search_filter_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_context_more_menu_natural_language() {
let routed = route_instruction("打开知乎更多菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "context_more_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_menu_natural_language() {
let routed = route_instruction("打开知乎通知菜单").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_menu" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_notifications_entry_natural_language() {
let routed = route_instruction("打开知乎通知按钮").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "notifications_entry" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_zhihu_search_box_natural_language() {
let routed = route_instruction("打开知乎搜索框").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "search_box" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_creator_write_button_natural_language() {
let routed = route_instruction("打开知乎创作中心写文章按钮").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "creator_write_button" && req.ensure_loaded
));
}
#[test]
fn route_instruction_routes_open_hot_from_home_flow_natural_language() {
let routed = route_instruction("从知乎首页进入热榜").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "open_hot_from_home" && req.ensure_loaded
));
}
#[test]
fn route_instruction_returns_none_for_non_skill_text() {
let routed = route_instruction("打开百度搜索天气").unwrap();
assert!(routed.is_none());
}
#[test]
fn route_instruction_returns_none_for_vague_zhihu_navigation_text() {
let routed = route_instruction("打开知乎").unwrap();
assert!(routed.is_none());
}
#[test]
fn route_instruction_returns_none_for_ambiguous_zhihu_notification_phrase() {
let routed = route_instruction("打开知乎通知").unwrap();
assert!(routed.is_none());
}
#[test]
fn route_instruction_routes_zhihu_hot_button_phrase_to_hot_tab() {
let routed = route_instruction("打开知乎热榜按钮").unwrap();
assert!(matches!(
routed,
Some(RoutedSkill::ZhihuNavigate(req))
if req.page == "hot_tab" && req.ensure_loaded
));
}
#[test]
fn route_instruction_rejects_unknown_skill_name() {
let err = route_instruction(r#"skill:unknown {"x":1}"#).unwrap_err();
assert!(matches!(err, RouterError::UnknownSkill(name) if name == "unknown"));
}

View File

@@ -0,0 +1,403 @@
mod common;
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use common::MockTransport;
use sgclaw::pipe::{BrowserMessage, BrowserPipeTool, Timing};
use sgclaw::security::MacPolicy;
use sgclaw::skill::zhihu_hotlist::{
execute_collect, execute_report, load_flow, ZhihuHotlistCollectRequest,
ZhihuHotlistReportRequest,
};
use sgclaw::skill::zhihu_hotlist_store::load_latest_snapshot;
fn test_policy() -> MacPolicy {
MacPolicy::from_json_str(
r#"{
"version": "1.0",
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
"pipe_actions": {
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
"blocked": []
}
}"#,
)
.unwrap()
}
fn temp_store_dir(label: &str) -> PathBuf {
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
std::env::temp_dir().join(format!("sgclaw-{label}-{unique}"))
}
fn hotlist_html() -> String {
r#"
<html>
<body>
<main>
<section data-hot-item>
<h2><a href="/question/123">第一条热榜</a></h2>
<div class="HotItem-content">第一条摘要</div>
<div class="HotItem-hot">1234 热度</div>
</section>
<section data-hot-item>
<h2><a href="/question/456">第二条热榜</a></h2>
<div class="HotItem-content">第二条摘要</div>
<div class="HotItem-hot">5.6 万热度</div>
</section>
</main>
</body>
</html>
"#
.to_string()
}
fn comment_html(
first_reply: u64,
first_upvote: u64,
second_reply: u64,
second_upvote: u64,
) -> String {
format!(
r#"
<html>
<body>
<div class="CommentListV2">
<div class="CommentItemV2" data-comment-id="comment-1">
<button>回复 {first_reply}</button>
<button>赞同 {first_upvote}</button>
<button>收藏 2</button>
<button>红心 1</button>
</div>
<div class="CommentItemV2" data-comment-id="comment-2">
<button>回复 {second_reply}</button>
<button>赞同 {second_upvote}</button>
<button>收藏 4</button>
<button>红心 3</button>
</div>
</div>
</body>
</html>
"#
)
}
#[test]
fn load_hotlist_flow_preserves_expected_selectors() {
let flow = load_flow().unwrap();
assert_eq!(flow.hotlist_url, "https://www.zhihu.com/hot");
assert_eq!(flow.domains["zhihu"], "www.zhihu.com");
assert!(flow.selectors["hotlist_item"].contains("HotList-item"));
assert!(flow.selectors["comment_metric"].contains("button"));
}
#[test]
fn zhihu_hotlist_collect_persists_snapshot_and_report_reads_latest() {
let store_dir = temp_store_dir("hotlist-collect");
let transport = Arc::new(MockTransport::new(vec![
BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 2,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 3,
success: true,
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 4,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 5,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 6,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 7,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 8,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 9,
success: true,
data: serde_json::json!({ "html": comment_html(3, 15, 1, 8), "url": "https://www.zhihu.com/question/123" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 10,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/question/456" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 11,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 12,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 13,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 14,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 15,
success: true,
data: serde_json::json!({ "html": comment_html(5, 20, 4, 16), "url": "https://www.zhihu.com/question/456" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute_collect(
transport.as_ref(),
&browser_tool,
ZhihuHotlistCollectRequest {
top_n: 2,
comments_per_item: 2,
store_dir: Some(store_dir.display().to_string()),
},
)
.unwrap();
assert_eq!(result.item_count, 2);
assert!(result.summary.contains("知乎热榜快照已保存"));
let snapshot = load_latest_snapshot(&store_dir).unwrap();
assert_eq!(snapshot.items.len(), 2);
assert_eq!(snapshot.items[0].title, "第一条热榜");
assert_eq!(snapshot.items[0].summary, "第一条摘要");
assert_eq!(snapshot.items[0].heat_value, Some(1234));
assert_eq!(snapshot.items[0].comment_metrics.len(), 2);
assert_eq!(snapshot.items[0].comment_metrics[0].reply_count, Some(3));
assert_eq!(snapshot.items[0].comment_metrics[0].upvote_count, Some(15));
assert_eq!(snapshot.items[1].heat_value, Some(56_000));
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 4);
let report = execute_report(ZhihuHotlistReportRequest {
snapshot_id: Some(result.snapshot_id.clone()),
store_dir: Some(store_dir.display().to_string()),
top_n: 2,
})
.unwrap();
assert!(report.summary.contains("第一条热榜"));
assert!(report.summary.contains("第二条热榜"));
assert!(report.summary.contains("回复 4"));
assert!(report.summary.contains("赞同 23"));
let _ = fs::remove_dir_all(&store_dir);
}
#[test]
fn zhihu_hotlist_collect_persists_partial_snapshot_when_comment_capture_fails() {
let store_dir = temp_store_dir("hotlist-partial");
let transport = Arc::new(MockTransport::new(vec![
BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 2,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 3,
success: true,
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 4,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 5,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 6,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 7,
success: false,
data: serde_json::json!({ "error": "comment list missing" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute_collect(
transport.as_ref(),
&browser_tool,
ZhihuHotlistCollectRequest {
top_n: 1,
comments_per_item: 2,
store_dir: Some(store_dir.display().to_string()),
},
)
.unwrap();
let snapshot = load_latest_snapshot(&store_dir).unwrap();
assert_eq!(result.item_count, 1);
assert_eq!(snapshot.collection_stats.partial_items, 1);
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 0);
assert!(snapshot.items[0].comment_metrics.is_empty());
let _ = fs::remove_dir_all(&store_dir);
}

View File

@@ -0,0 +1,661 @@
mod common;
use std::sync::Arc;
use std::time::Duration;
use common::MockTransport;
use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing};
use sgclaw::security::MacPolicy;
use sgclaw::skill::zhihu_navigation::{execute, load_catalog, ZhihuNavigateRequest};
fn test_policy() -> MacPolicy {
MacPolicy::from_json_str(
r#"{
"version": "1.0",
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
"pipe_actions": {
"allowed": ["click", "navigate", "getText", "waitForSelector"],
"blocked": []
}
}"#,
)
.unwrap()
}
fn response(seq: u64, data: serde_json::Value) -> BrowserMessage {
BrowserMessage::Response {
seq,
success: true,
data,
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
}
}
#[test]
fn load_catalog_preserves_confirmed_content_analysis_route() {
let catalog = load_catalog().unwrap();
assert_eq!(catalog.domains["creator"], "www.zhihu.com");
assert_eq!(
catalog.routes["content_analysis"].url,
"https://www.zhihu.com/creator/analytics/work/all"
);
assert_eq!(
catalog.targets["content_analysis"].route_ref.as_deref(),
Some("content_analysis")
);
assert!(catalog.routes["content_analysis"]
.aliases
.iter()
.any(|alias| alias == "知乎内容分析页面"));
}
#[test]
fn load_catalog_includes_top_level_navigation_targets() {
let catalog = load_catalog().unwrap();
assert_eq!(catalog.routes["home"].url, "https://www.zhihu.com/");
assert_eq!(catalog.routes["hot_list"].url, "https://www.zhihu.com/hot");
assert_eq!(
catalog.routes["column_home"].url,
"https://zhuanlan.zhihu.com/"
);
assert_eq!(
catalog.routes["messages_page"].url,
"https://www.zhihu.com/messages"
);
assert_eq!(
catalog.routes["notifications_page"].url,
"https://www.zhihu.com/notifications"
);
assert_eq!(
catalog.targets["messages_unread_tab"]
.component_ref
.as_deref(),
Some("messages_tab_unread")
);
assert_eq!(
catalog.targets["notifications_replies_tab"]
.component_ref
.as_deref(),
Some("notifications_tab_replies")
);
assert_eq!(
catalog.targets["notifications_settings_menu"]
.component_ref
.as_deref(),
Some("notifications_settings_menu")
);
assert_eq!(
catalog.targets["profile_page"].flow_ref.as_deref(),
Some("open_profile_from_avatar_menu")
);
assert_eq!(
catalog.targets["notifications_menu"].flow_ref.as_deref(),
Some("open_notifications_menu")
);
assert_eq!(
catalog.targets["search_box"].component_ref.as_deref(),
Some("top_nav_search")
);
assert_eq!(
catalog.components["creator_write_button"]
.result_domain_ref
.as_deref(),
Some("editor")
);
}
#[test]
fn load_catalog_includes_expanded_profile_and_settings_flows() {
let catalog = load_catalog().unwrap();
assert_eq!(
catalog.targets["profile_answers_tab"].flow_ref.as_deref(),
Some("open_profile_answers_tab")
);
assert_eq!(
catalog.targets["profile_followers_tab"].flow_ref.as_deref(),
Some("open_profile_followers_tab")
);
assert_eq!(
catalog.targets["settings_account_menu"].flow_ref.as_deref(),
Some("open_account_settings_from_avatar_menu")
);
assert_eq!(
catalog.targets["settings_privacy_menu"].flow_ref.as_deref(),
Some("open_privacy_settings_from_avatar_menu")
);
assert_eq!(
catalog.targets["settings_security_menu"]
.flow_ref
.as_deref(),
Some("open_security_settings_from_avatar_menu")
);
}
#[test]
fn zhihu_navigation_skill_opens_content_analysis_page() {
let transport = Arc::new(MockTransport::new(vec![response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator/analytics/work/all" }),
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "content_analysis".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎页面已打开:内容分析 (https://www.zhihu.com/creator/analytics/work/all)"
);
assert_eq!(result.page, "content_analysis");
assert_eq!(
result.final_url,
"https://www.zhihu.com/creator/analytics/work/all"
);
assert_eq!(sent.len(), 2);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info"
&& message == "navigate https://www.zhihu.com/creator/analytics/work/all"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
}
#[test]
fn zhihu_navigation_skill_clicks_creator_write_button() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(
3,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "creator_write_button".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎入口已打开:写文章入口按钮 (https://zhuanlan.zhihu.com/write)"
);
assert_eq!(result.page, "creator_write_button");
assert_eq!(result.final_url, "https://zhuanlan.zhihu.com/write");
assert_eq!(sent.len(), 6);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/creator"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 写文章入口按钮"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, security, .. }
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
assert!(matches!(
&sent[4],
AgentMessage::LogEntry { level, message }
if level == "info" && message.contains("wait for textarea")
));
assert!(matches!(
&sent[5],
AgentMessage::Command { seq, action, security, .. }
if *seq == 3 && action == &Action::WaitForSelector && security.expected_domain == "zhuanlan.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_opens_notifications_menu_flow() {
let transport = Arc::new(MockTransport::new(vec![
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
response(2, serde_json::json!({ "clicked": true })),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "notifications_menu".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(result.summary, "知乎菜单已打开:通知菜单");
assert_eq!(result.page, "notifications_menu");
assert_eq!(result.final_url, "https://www.zhihu.com/");
assert_eq!(sent.len(), 4);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 通知菜单"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, security, .. }
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_opens_profile_page_from_avatar_menu() {
let transport = Arc::new(MockTransport::new(vec![
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
response(2, serde_json::json!({ "clicked": true })),
response(
3,
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/people/test-user" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "profile_page".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎导航已完成:个人主页 (https://www.zhihu.com/people/test-user)"
);
assert_eq!(result.page, "profile_page");
assert_eq!(result.final_url, "https://www.zhihu.com/people/test-user");
assert_eq!(sent.len(), 6);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 头像菜单"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, .. }
if *seq == 2 && action == &Action::Click
));
assert!(matches!(
&sent[4],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 个人主页入口"
));
assert!(matches!(
&sent[5],
AgentMessage::Command { seq, action, security, .. }
if *seq == 3 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_opens_profile_answers_tab_from_avatar_menu() {
let transport = Arc::new(MockTransport::new(vec![
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
response(2, serde_json::json!({ "clicked": true })),
response(
3,
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/people/test-user" }),
),
response(
4,
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/people/test-user/answers" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "profile_answers_tab".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎导航已完成:回答分栏 (https://www.zhihu.com/people/test-user/answers)"
);
assert_eq!(result.page, "profile_answers_tab");
assert_eq!(
result.final_url,
"https://www.zhihu.com/people/test-user/answers"
);
assert_eq!(sent.len(), 8);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 头像菜单"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, .. }
if *seq == 2 && action == &Action::Click
));
assert!(matches!(
&sent[4],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 个人主页入口"
));
assert!(matches!(
&sent[5],
AgentMessage::Command { seq, action, .. }
if *seq == 3 && action == &Action::Click
));
assert!(matches!(
&sent[6],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 回答分栏"
));
assert!(matches!(
&sent[7],
AgentMessage::Command { seq, action, security, .. }
if *seq == 4 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_opens_account_settings_from_avatar_menu() {
let transport = Arc::new(MockTransport::new(vec![
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
response(2, serde_json::json!({ "clicked": true })),
response(
3,
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/settings/account" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "settings_account_menu".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎导航已完成:账号设置菜单 (https://www.zhihu.com/settings/account)"
);
assert_eq!(result.page, "settings_account_menu");
assert_eq!(result.final_url, "https://www.zhihu.com/settings/account");
assert_eq!(sent.len(), 6);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 头像菜单"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, .. }
if *seq == 2 && action == &Action::Click
));
assert!(matches!(
&sent[4],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 账号设置菜单"
));
assert!(matches!(
&sent[5],
AgentMessage::Command { seq, action, security, .. }
if *seq == 3 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_opens_notifications_replies_tab() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/notifications" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/notifications/replies" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "notifications_replies_tab".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎入口已打开:回复我的 (https://www.zhihu.com/notifications/replies)"
);
assert_eq!(sent.len(), 4);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/notifications"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 回复我的"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, security, .. }
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_opens_messages_settings_menu() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/messages" }),
),
response(2, serde_json::json!({ "clicked": true })),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "messages_settings_menu".to_string(),
ensure_loaded: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(result.summary, "知乎菜单已打开:消息设置菜单");
assert_eq!(result.final_url, "https://www.zhihu.com/messages");
assert_eq!(sent.len(), 4);
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate https://www.zhihu.com/messages"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "click 消息设置菜单"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, security, .. }
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
));
}
#[test]
fn zhihu_navigation_skill_rejects_unknown_target() {
let transport = Arc::new(MockTransport::new(vec![]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let err = execute(
transport.as_ref(),
&browser_tool,
ZhihuNavigateRequest {
page: "unknown_target".to_string(),
ensure_loaded: true,
},
)
.unwrap_err();
assert!(err
.to_string()
.contains("unknown zhihu target: unknown_target"));
}

357
tests/zhihu_skill_test.rs Normal file
View File

@@ -0,0 +1,357 @@
mod common;
use std::sync::Arc;
use std::time::Duration;
use common::MockTransport;
use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing};
use sgclaw::security::MacPolicy;
use sgclaw::skill::zhihu::{execute, load_flow, ZhihuWriteRequest};
fn test_policy() -> MacPolicy {
MacPolicy::from_json_str(
r#"{
"version": "1.0",
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
"pipe_actions": {
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
"blocked": []
}
}"#,
)
.unwrap()
}
fn response(seq: u64, data: serde_json::Value) -> BrowserMessage {
BrowserMessage::Response {
seq,
success: true,
data,
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
}
}
#[test]
fn load_flow_preserves_validated_zhihu_literals() {
let flow = load_flow().unwrap();
assert_eq!(flow.entry_url, "https://www.zhihu.com/creator");
assert_eq!(flow.editor_url, "https://zhuanlan.zhihu.com/write");
assert_eq!(flow.literals["write_entry_text"], "写文章");
assert_eq!(flow.literals["publish_confirm_text"], "确认发布");
assert_eq!(
flow.literals["title_placeholder"],
"请输入标题(最多 100 个字)"
);
assert_eq!(
flow.selectors["creator_write_entry"],
"div.css-1q62b6s > div.css-byu4by"
);
assert_eq!(
flow.selectors["publish_confirm_button"],
"div[role='dialog'] button.Button--primary.Button--blue"
);
}
#[test]
fn zhihu_skill_stops_before_publish_when_publish_is_false() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(
3,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(4, serde_json::json!({ "typed": true })),
response(5, serde_json::json!({ "typed": true })),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuWriteRequest {
title: "自动发文能力测试".to_string(),
body: "第一段\n\n第二段".to_string(),
publish: false,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(result.summary, "知乎文章草稿已填充:自动发文能力测试");
assert_eq!(sent.len(), 10);
assert!(matches!(
&sent[5],
AgentMessage::Command { seq, action, .. }
if *seq == 3 && action == &Action::WaitForSelector
));
assert!(matches!(
&sent[9],
AgentMessage::Command { seq, action, .. }
if *seq == 5 && action == &Action::Type
));
}
#[test]
fn zhihu_skill_publishes_only_after_confirming_dialog_title_and_final_url() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(
3,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(4, serde_json::json!({ "typed": true })),
response(5, serde_json::json!({ "typed": true })),
response(6, serde_json::json!({ "scrolled": true })),
response(
7,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(8, serde_json::json!({ "ready": true })),
response(
9,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
),
response(
10,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
),
response(
11,
serde_json::json!({ "text": "自动发文能力测试", "url": "https://zhuanlan.zhihu.com/p/123456" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuWriteRequest {
title: "自动发文能力测试".to_string(),
body: "第一段\n\n第二段".to_string(),
publish: true,
},
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(
result.summary,
"知乎文章已发布:自动发文能力测试 (https://zhuanlan.zhihu.com/p/123456)"
);
assert_eq!(
result.final_url.as_deref(),
Some("https://zhuanlan.zhihu.com/p/123456")
);
assert!(result.published);
assert_eq!(sent.len(), 22);
assert!(matches!(
&sent[11],
AgentMessage::Command { seq, action, .. }
if *seq == 6 && action == &Action::ScrollTo
));
assert!(matches!(
&sent[21],
AgentMessage::Command { seq, action, .. }
if *seq == 11 && action == &Action::GetText
));
}
#[test]
fn zhihu_skill_accepts_edit_url_as_published_article_url() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(
3,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(4, serde_json::json!({ "typed": true })),
response(5, serde_json::json!({ "typed": true })),
response(6, serde_json::json!({ "scrolled": true })),
response(
7,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(8, serde_json::json!({ "ready": true })),
response(
9,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/p/123456/edit" }),
),
response(
10,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/p/123456/edit" }),
),
response(
11,
serde_json::json!({ "text": "自动发文能力测试", "url": "https://zhuanlan.zhihu.com/p/123456/edit" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute(
transport.as_ref(),
&browser_tool,
ZhihuWriteRequest {
title: "自动发文能力测试".to_string(),
body: "第一段\n\n第二段".to_string(),
publish: true,
},
)
.unwrap();
assert_eq!(
result.final_url.as_deref(),
Some("https://zhuanlan.zhihu.com/p/123456")
);
assert_eq!(
result.summary,
"知乎文章已发布:自动发文能力测试 (https://zhuanlan.zhihu.com/p/123456)"
);
}
#[test]
fn zhihu_skill_fails_when_publish_confirmation_never_returns_article_url() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(
3,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(4, serde_json::json!({ "typed": true })),
response(5, serde_json::json!({ "typed": true })),
response(6, serde_json::json!({ "scrolled": true })),
response(
7,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(8, serde_json::json!({ "ready": true })),
response(9, serde_json::json!({ "clicked": true })),
response(10, serde_json::json!({ "ready": true })),
response(11, serde_json::json!({ "text": "自动发文能力测试" })),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let err = execute(
transport.as_ref(),
&browser_tool,
ZhihuWriteRequest {
title: "自动发文能力测试".to_string(),
body: "第一段\n\n第二段".to_string(),
publish: true,
},
)
.unwrap_err();
assert!(err.to_string().contains("did not return article url"));
}
#[test]
fn zhihu_skill_fails_when_published_title_does_not_match_request_title() {
let transport = Arc::new(MockTransport::new(vec![
response(
1,
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
),
response(
2,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(
3,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(4, serde_json::json!({ "typed": true })),
response(5, serde_json::json!({ "typed": true })),
response(6, serde_json::json!({ "scrolled": true })),
response(
7,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
),
response(8, serde_json::json!({ "ready": true })),
response(
9,
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
),
response(
10,
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
),
response(
11,
serde_json::json!({ "text": "别的标题", "url": "https://zhuanlan.zhihu.com/p/123456" }),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let err = execute(
transport.as_ref(),
&browser_tool,
ZhihuWriteRequest {
title: "自动发文能力测试".to_string(),
body: "第一段\n\n第二段".to_string(),
publish: true,
},
)
.unwrap_err();
assert!(err
.to_string()
.contains("expected text `自动发文能力测试`, got `别的标题`"));
}