feat: restore zhihu browser skills
Reconnect the recovered Zhihu skill flows to the live browser runtime and resolve their resources relative to the executable so they work outside the repo root. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
344
Cargo.lock
generated
344
Cargo.lock
generated
@@ -33,6 +33,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"getrandom 0.3.4",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
@@ -328,6 +329,12 @@ version = "1.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder-lite"
|
||||
version = "0.1.0"
|
||||
@@ -418,7 +425,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"phf",
|
||||
"phf 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -604,12 +611,46 @@ dependencies = [
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.31.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa",
|
||||
"phf 0.11.3",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser-macros"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dialoguer"
|
||||
version = "0.12.0"
|
||||
@@ -675,6 +716,21 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa-short"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
|
||||
dependencies = [
|
||||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dunce"
|
||||
version = "1.0.5"
|
||||
@@ -687,6 +743,12 @@ version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
|
||||
|
||||
[[package]]
|
||||
name = "ego-tree"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
@@ -839,6 +901,16 @@ version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.32"
|
||||
@@ -936,6 +1008,15 @@ dependencies = [
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
@@ -946,6 +1027,15 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.17"
|
||||
@@ -1089,6 +1179,20 @@ dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.4.0"
|
||||
@@ -1543,6 +1647,12 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "mail-parser"
|
||||
version = "0.11.2"
|
||||
@@ -1552,6 +1662,20 @@ dependencies = [
|
||||
"hashify",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf 0.11.3",
|
||||
"phf_codegen 0.11.3",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.2.0"
|
||||
@@ -1632,6 +1756,12 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11ec1bc47d34ae756616f387c11fd0595f86f2cc7e6473bde9e3ded30cb902a1"
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
@@ -1737,13 +1867,103 @@ version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"phf_shared 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||
dependencies = [
|
||||
"phf_shared 0.11.3",
|
||||
"rand 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||
dependencies = [
|
||||
"siphasher 0.3.11",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||
dependencies = [
|
||||
"siphasher 1.0.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1752,7 +1972,7 @@ version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
"siphasher 1.0.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1847,6 +2067,12 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.37"
|
||||
@@ -1964,13 +2190,24 @@ version = "6.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha 0.3.1",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
|
||||
dependencies = [
|
||||
"rand_chacha",
|
||||
"rand_chacha 0.9.0",
|
||||
"rand_core 0.9.5",
|
||||
]
|
||||
|
||||
@@ -1985,6 +2222,16 @@ dependencies = [
|
||||
"rand_core 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.9.0"
|
||||
@@ -2320,6 +2567,41 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scraper"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b90460b31bfe1fc07be8262e42c665ad97118d4585869de9345a84d501a9eaf0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"cssparser",
|
||||
"ego-tree",
|
||||
"getopts",
|
||||
"html5ever",
|
||||
"once_cell",
|
||||
"selectors",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cssparser",
|
||||
"derive_more",
|
||||
"fxhash",
|
||||
"log",
|
||||
"new_debug_unreachable",
|
||||
"phf 0.10.1",
|
||||
"phf_codegen 0.10.0",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "self_cell"
|
||||
version = "1.2.2"
|
||||
@@ -2418,6 +2700,15 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sgclaw"
|
||||
version = "0.1.0"
|
||||
@@ -2428,7 +2719,9 @@ dependencies = [
|
||||
"futures-util",
|
||||
"hex",
|
||||
"hmac",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
@@ -2506,6 +2799,12 @@ version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "1.0.2"
|
||||
@@ -2550,7 +2849,6 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
@@ -2566,6 +2864,31 @@ dependencies = [
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"parking_lot",
|
||||
"phf_shared 0.11.3",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.3",
|
||||
"phf_shared 0.11.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
@@ -2633,6 +2956,17 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
|
||||
@@ -10,7 +10,9 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
futures-util = "0.3"
|
||||
hex = "0.4"
|
||||
hmac = "0.12"
|
||||
regex = "1"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
scraper = "0.20"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
sha2 = "0.10"
|
||||
|
||||
19
resources/skills/zhihu_hotlist_flow.json
Normal file
19
resources/skills/zhihu_hotlist_flow.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"hotlist_url": "https://www.zhihu.com/hot",
|
||||
"domains": {
|
||||
"zhihu": "www.zhihu.com"
|
||||
},
|
||||
"literals": {
|
||||
"hotlist_guard": "热榜"
|
||||
},
|
||||
"selectors": {
|
||||
"hotlist_root": "main, body",
|
||||
"hotlist_item": ".HotList-item, [data-hot-item], section ol li",
|
||||
"hotlist_title_link": ".HotList-item-title a, h2 a, .ContentItem-title a",
|
||||
"hotlist_summary": ".HotList-item-summary, .HotItem-content, .RichContent-inner, .ContentItem-excerpt",
|
||||
"hotlist_heat": ".HotList-item-heat, .HotItem-metrics, .HotItem-hot",
|
||||
"comment_list": ".Comments-list, .CommentListV2, [data-testid='comment-list'], .CommentList",
|
||||
"comment_item": ".Comments-list > .CommentItem, .CommentListV2 > .CommentItem, .CommentItemV2, .CommentItem",
|
||||
"comment_metric": ".CommentItem-metric, .CommentItem-footer button, .ContentItem-actions button, button"
|
||||
}
|
||||
}
|
||||
2481
resources/skills/zhihu_navigation_pages.json
Normal file
2481
resources/skills/zhihu_navigation_pages.json
Normal file
File diff suppressed because it is too large
Load Diff
126
resources/skills/zhihu_write_flow.json
Normal file
126
resources/skills/zhihu_write_flow.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"entry_url": "https://www.zhihu.com/creator",
|
||||
"editor_url": "https://zhuanlan.zhihu.com/write",
|
||||
"domains": {
|
||||
"creator": "www.zhihu.com",
|
||||
"editor": "zhuanlan.zhihu.com"
|
||||
},
|
||||
"literals": {
|
||||
"write_entry_text": "写文章",
|
||||
"title_placeholder": "请输入标题(最多 100 个字)",
|
||||
"body_role": "textbox",
|
||||
"publish_text": "发布",
|
||||
"publish_confirm_text": "确认发布"
|
||||
},
|
||||
"selectors": {
|
||||
"creator_write_panel": "div.css-1q62b6s",
|
||||
"creator_write_entry": "div.css-1q62b6s > div.css-byu4by",
|
||||
"title_input": "textarea[placeholder='请输入标题(最多 100 个字)']",
|
||||
"body_editor": "div.notranslate.public-DraftEditor-content[contenteditable='true'][role='textbox']",
|
||||
"publish_button": "button.Button--primary.Button--blue",
|
||||
"publish_confirm_dialog": "div[role='dialog']",
|
||||
"publish_confirm_button": "div[role='dialog'] button.Button--primary.Button--blue",
|
||||
"published_title": "h1"
|
||||
},
|
||||
"steps": [
|
||||
{
|
||||
"name": "navigate_creator",
|
||||
"action": "navigate",
|
||||
"expected_domain": "creator",
|
||||
"url_ref": "entry_url",
|
||||
"log_message": "navigate https://www.zhihu.com/creator"
|
||||
},
|
||||
{
|
||||
"name": "click_write_article",
|
||||
"action": "click",
|
||||
"expected_domain": "creator",
|
||||
"selector_ref": "creator_write_entry",
|
||||
"wait_after_ms": 1500,
|
||||
"log_message": "click 写文章"
|
||||
},
|
||||
{
|
||||
"name": "wait_editor_ready",
|
||||
"action": "waitForSelector",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "title_input",
|
||||
"timeout_ms": 8000,
|
||||
"log_message": "wait for editor title input"
|
||||
},
|
||||
{
|
||||
"name": "type_title",
|
||||
"action": "type",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "title_input",
|
||||
"text_source": "title",
|
||||
"clear_first": true,
|
||||
"log_message": "type article title into 请输入标题(最多 100 个字)"
|
||||
},
|
||||
{
|
||||
"name": "type_body",
|
||||
"action": "type",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "body_editor",
|
||||
"text_source": "body",
|
||||
"clear_first": true,
|
||||
"log_message": "type article body into editor textbox"
|
||||
},
|
||||
{
|
||||
"name": "scroll_publish_button",
|
||||
"action": "scrollTo",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "publish_button",
|
||||
"only_when_publish": true,
|
||||
"log_message": "scroll to 发布"
|
||||
},
|
||||
{
|
||||
"name": "click_publish",
|
||||
"action": "click",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "publish_button",
|
||||
"wait_after_ms": 800,
|
||||
"only_when_publish": true,
|
||||
"capture_url": true,
|
||||
"log_message": "click 发布"
|
||||
},
|
||||
{
|
||||
"name": "wait_publish_confirm_dialog",
|
||||
"action": "waitForSelector",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "publish_confirm_dialog",
|
||||
"timeout_ms": 8000,
|
||||
"only_when_publish": true,
|
||||
"log_message": "wait for publish confirm dialog"
|
||||
},
|
||||
{
|
||||
"name": "click_publish_confirm",
|
||||
"action": "click",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "publish_confirm_button",
|
||||
"wait_after_ms": 1500,
|
||||
"only_when_publish": true,
|
||||
"capture_url": true,
|
||||
"log_message": "click 确认发布"
|
||||
},
|
||||
{
|
||||
"name": "wait_published_title",
|
||||
"action": "waitForSelector",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "published_title",
|
||||
"timeout_ms": 15000,
|
||||
"only_when_publish": true,
|
||||
"capture_url": true,
|
||||
"log_message": "wait for published article title"
|
||||
},
|
||||
{
|
||||
"name": "confirm_published_title",
|
||||
"action": "getText",
|
||||
"expected_domain": "editor",
|
||||
"selector_ref": "published_title",
|
||||
"only_when_publish": true,
|
||||
"expect_text_source": "title",
|
||||
"allow_empty_text": true,
|
||||
"capture_url": true,
|
||||
"log_message": "verify published article title"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -9,6 +9,7 @@ use crate::config::DeepSeekSettings;
|
||||
use crate::pipe::{
|
||||
AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
|
||||
};
|
||||
use crate::skill;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct AgentRuntimeContext {
|
||||
@@ -34,7 +35,7 @@ impl AgentRuntimeContext {
|
||||
let _ = args.next();
|
||||
|
||||
while let Some(arg) = args.next() {
|
||||
if arg == OsString::from("--config-path") {
|
||||
if arg.to_string_lossy() == "--config-path" {
|
||||
let Some(value) = args.next() else {
|
||||
return Err(PipeError::Protocol(
|
||||
"missing value for --config-path".to_string(),
|
||||
@@ -88,26 +89,58 @@ fn send_mode_log<T: Transport>(transport: &T, mode: &str) -> Result<(), PipeErro
|
||||
})
|
||||
}
|
||||
|
||||
fn explicit_non_task_response(history: &[ConversationMessage], instruction: &str) -> Option<String> {
|
||||
fn explicit_non_task_response(
|
||||
history: &[ConversationMessage],
|
||||
instruction: &str,
|
||||
) -> Option<String> {
|
||||
if !history.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let trimmed = instruction.trim();
|
||||
if trimmed.is_empty() {
|
||||
return Some("sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。".to_string());
|
||||
return Some(
|
||||
"sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
const TASK_HINTS: &[&str] = &[
|
||||
"打开", "搜索", "点击", "输入", "导航", "跳转", "访问", "提取", "获取", "网页", "页面",
|
||||
"标签页", "百度", "知乎", "google", "open", "search", "click", "type", "navigate",
|
||||
"打开",
|
||||
"搜索",
|
||||
"点击",
|
||||
"输入",
|
||||
"导航",
|
||||
"跳转",
|
||||
"访问",
|
||||
"提取",
|
||||
"获取",
|
||||
"网页",
|
||||
"页面",
|
||||
"标签页",
|
||||
"百度",
|
||||
"知乎",
|
||||
"google",
|
||||
"open",
|
||||
"search",
|
||||
"click",
|
||||
"type",
|
||||
"navigate",
|
||||
];
|
||||
if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) {
|
||||
return None;
|
||||
}
|
||||
|
||||
const CHITCHAT_INPUTS: &[&str] = &[
|
||||
"hi", "hello", "hey", "你好", "您好", "嗨", "在吗", "你是谁", "介绍一下你自己",
|
||||
"hi",
|
||||
"hello",
|
||||
"hey",
|
||||
"你好",
|
||||
"您好",
|
||||
"嗨",
|
||||
"在吗",
|
||||
"你是谁",
|
||||
"介绍一下你自己",
|
||||
];
|
||||
if CHITCHAT_INPUTS
|
||||
.iter()
|
||||
@@ -194,6 +227,22 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
|
||||
});
|
||||
}
|
||||
|
||||
match skill::try_execute_skill(transport, browser_tool, &instruction) {
|
||||
Ok(Some(summary)) => {
|
||||
return transport.send(&AgentMessage::TaskComplete {
|
||||
success: true,
|
||||
summary,
|
||||
});
|
||||
}
|
||||
Err(err) => {
|
||||
return transport.send(&AgentMessage::TaskComplete {
|
||||
success: false,
|
||||
summary: err.to_string(),
|
||||
});
|
||||
}
|
||||
Ok(None) => {}
|
||||
}
|
||||
|
||||
let task_context = CompatTaskContext {
|
||||
conversation_id: (!conversation_id.trim().is_empty())
|
||||
.then_some(conversation_id.clone()),
|
||||
|
||||
@@ -21,8 +21,7 @@ pub fn execute_task_with_provider<P: LlmProvider, T: Transport>(
|
||||
let messages = vec![
|
||||
ChatMessage {
|
||||
role: "system".to_string(),
|
||||
content: "You are sgClaw. Use browser_action to complete the browser task."
|
||||
.to_string(),
|
||||
content: "You are sgClaw. Use browser_action to complete the browser task.".to_string(),
|
||||
},
|
||||
ChatMessage {
|
||||
role: "user".to_string(),
|
||||
@@ -35,8 +34,8 @@ pub fn execute_task_with_provider<P: LlmProvider, T: Transport>(
|
||||
.map_err(map_llm_error_to_pipe_error)?;
|
||||
|
||||
for call in calls {
|
||||
let browser_call = parse_browser_action_call(call)
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
let browser_call =
|
||||
parse_browser_action_call(call).map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
|
||||
@@ -60,14 +60,14 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
|
||||
Err(err) => return Ok(failed_tool_result(err.to_string())),
|
||||
};
|
||||
|
||||
let result = match self.browser_tool.invoke(
|
||||
request.action,
|
||||
request.params,
|
||||
&request.expected_domain,
|
||||
) {
|
||||
Ok(result) => result,
|
||||
Err(err) => return Ok(failed_tool_result(err.to_string())),
|
||||
};
|
||||
let result =
|
||||
match self
|
||||
.browser_tool
|
||||
.invoke(request.action, request.params, &request.expected_domain)
|
||||
{
|
||||
Ok(result) => result,
|
||||
Err(err) => return Ok(failed_tool_result(err.to_string())),
|
||||
};
|
||||
|
||||
let output = serde_json::to_string(&json!({
|
||||
"seq": result.seq,
|
||||
@@ -80,8 +80,7 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
|
||||
Ok(ToolResult {
|
||||
success: result.success,
|
||||
output,
|
||||
error: (!result.success)
|
||||
.then(|| format_browser_action_error(&result.data)),
|
||||
error: (!result.success).then(|| format_browser_action_error(&result.data)),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -92,7 +91,9 @@ struct BrowserActionRequest {
|
||||
params: Value,
|
||||
}
|
||||
|
||||
fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, BrowserActionAdapterError> {
|
||||
fn parse_browser_action_request(
|
||||
args: Value,
|
||||
) -> Result<BrowserActionRequest, BrowserActionAdapterError> {
|
||||
let mut args = match args {
|
||||
Value::Object(args) => args,
|
||||
other => {
|
||||
|
||||
@@ -8,7 +8,9 @@ use crate::config::DeepSeekSettings;
|
||||
|
||||
const SGCLAW_ZEROCLAW_WORKSPACE_DIR: &str = ".sgclaw-zeroclaw-workspace";
|
||||
|
||||
pub fn build_zeroclaw_config(workspace_root: &Path) -> Result<ZeroClawConfig, crate::config::ConfigError> {
|
||||
pub fn build_zeroclaw_config(
|
||||
workspace_root: &Path,
|
||||
) -> Result<ZeroClawConfig, crate::config::ConfigError> {
|
||||
let settings = DeepSeekSettings::from_env()?;
|
||||
Ok(build_zeroclaw_config_from_settings(
|
||||
workspace_root,
|
||||
@@ -21,13 +23,15 @@ pub fn build_zeroclaw_config_from_settings(
|
||||
settings: &DeepSeekSettings,
|
||||
) -> ZeroClawConfig {
|
||||
let workspace_dir = zeroclaw_workspace_dir(workspace_root);
|
||||
let mut config = ZeroClawConfig::default();
|
||||
config.workspace_dir = workspace_dir.clone();
|
||||
config.config_path = workspace_dir.join("config.toml");
|
||||
config.default_provider = Some("deepseek".to_string());
|
||||
config.default_model = Some(settings.model.clone());
|
||||
config.api_key = Some(settings.api_key.clone());
|
||||
config.api_url = Some(settings.base_url.clone());
|
||||
let mut config = ZeroClawConfig {
|
||||
workspace_dir: workspace_dir.clone(),
|
||||
config_path: workspace_dir.join("config.toml"),
|
||||
default_provider: Some("deepseek".to_string()),
|
||||
default_model: Some(settings.model.clone()),
|
||||
api_key: Some(settings.api_key.clone()),
|
||||
api_url: Some(settings.base_url.clone()),
|
||||
..ZeroClawConfig::default()
|
||||
};
|
||||
configure_embedded_memory(&mut config);
|
||||
configure_embedded_cron(&mut config);
|
||||
config
|
||||
|
||||
@@ -65,7 +65,10 @@ where
|
||||
|
||||
for job in jobs {
|
||||
if !matches!(job.job_type, JobType::Agent) {
|
||||
anyhow::bail!("unsupported cron job type in sgclaw compat: {:?}", job.job_type);
|
||||
anyhow::bail!(
|
||||
"unsupported cron job type in sgclaw compat: {:?}",
|
||||
job.job_type
|
||||
);
|
||||
}
|
||||
|
||||
let started_at = Utc::now();
|
||||
|
||||
@@ -9,10 +9,12 @@ pub fn log_entry_for_turn_event(event: &TurnEvent) -> Option<AgentMessage> {
|
||||
level: "info".to_string(),
|
||||
message: format_tool_call(name, args),
|
||||
}),
|
||||
TurnEvent::ToolResult { output, .. } if is_tool_error(output) => Some(AgentMessage::LogEntry {
|
||||
level: "error".to_string(),
|
||||
message: output.trim_start_matches("Error: ").to_string(),
|
||||
}),
|
||||
TurnEvent::ToolResult { output, .. } if is_tool_error(output) => {
|
||||
Some(AgentMessage::LogEntry {
|
||||
level: "error".to_string(),
|
||||
message: output.trim_start_matches("Error: ").to_string(),
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -29,7 +31,10 @@ fn format_tool_call(name: &str, args: &Value) -> String {
|
||||
|
||||
match action {
|
||||
"navigate" => {
|
||||
let url = args.get("url").and_then(Value::as_str).unwrap_or("<missing-url>");
|
||||
let url = args
|
||||
.get("url")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("<missing-url>");
|
||||
format!("navigate {url}")
|
||||
}
|
||||
"type" => {
|
||||
|
||||
@@ -7,18 +7,14 @@ use zeroclaw::agent::dispatcher::NativeToolDispatcher;
|
||||
use zeroclaw::agent::{Agent, TurnEvent};
|
||||
use zeroclaw::config::Config as ZeroClawConfig;
|
||||
use zeroclaw::observability::{NoopObserver, Observer};
|
||||
use zeroclaw::providers::{
|
||||
self, ChatMessage, ChatRequest, ChatResponse, Provider,
|
||||
};
|
||||
use zeroclaw::providers::traits::{
|
||||
ProviderCapabilities, StreamEvent, StreamOptions, StreamResult,
|
||||
};
|
||||
use zeroclaw::providers::traits::{ProviderCapabilities, StreamEvent, StreamOptions, StreamResult};
|
||||
use zeroclaw::providers::{self, ChatMessage, ChatRequest, ChatResponse, Provider};
|
||||
|
||||
use crate::compat::browser_tool_adapter::{ZeroClawBrowserTool, BROWSER_ACTION_TOOL_NAME};
|
||||
use crate::compat::config_adapter::build_zeroclaw_config_from_settings;
|
||||
use crate::config::DeepSeekSettings;
|
||||
use crate::compat::event_bridge::log_entry_for_turn_event;
|
||||
use crate::compat::memory_adapter::build_memory;
|
||||
use crate::config::DeepSeekSettings;
|
||||
use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport};
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -123,10 +119,7 @@ fn build_agent<T: Transport + 'static>(
|
||||
|
||||
fn build_provider(config: &ZeroClawConfig) -> Result<Box<dyn Provider>, PipeError> {
|
||||
let provider_name = config.default_provider.as_deref().unwrap_or("deepseek");
|
||||
let model_name = config
|
||||
.default_model
|
||||
.as_deref()
|
||||
.unwrap_or("deepseek-chat");
|
||||
let model_name = config.default_model.as_deref().unwrap_or("deepseek-chat");
|
||||
let runtime_options = providers::provider_runtime_options_from_config(config);
|
||||
let resolved_provider_name = if provider_name == "deepseek" {
|
||||
config
|
||||
@@ -191,7 +184,9 @@ impl Provider for NonStreamingProvider {
|
||||
model: &str,
|
||||
temperature: f64,
|
||||
) -> anyhow::Result<String> {
|
||||
self.inner.chat_with_history(messages, model, temperature).await
|
||||
self.inner
|
||||
.chat_with_history(messages, model, temperature)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn chat(
|
||||
|
||||
@@ -50,8 +50,7 @@ impl DeepSeekSettings {
|
||||
let config: RawDeepSeekSettings = serde_json::from_str(&raw)
|
||||
.map_err(|err| ConfigError::ConfigParse(path.to_path_buf(), err.to_string()))?;
|
||||
|
||||
Self::new(config.api_key, config.base_url, config.model)
|
||||
.map_err(|err| err.with_path(path))
|
||||
Self::new(config.api_key, config.base_url, config.model).map_err(|err| err.with_path(path))
|
||||
}
|
||||
|
||||
fn new(api_key: String, base_url: String, model: String) -> Result<Self, ConfigError> {
|
||||
|
||||
@@ -4,6 +4,7 @@ pub mod config;
|
||||
pub mod llm;
|
||||
pub mod pipe;
|
||||
pub mod security;
|
||||
pub mod skill;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -76,45 +76,35 @@ impl<T: Transport> BrowserPipeTool<T> {
|
||||
self.transport.send(&command)?;
|
||||
|
||||
let started = Instant::now();
|
||||
loop {
|
||||
let Some(remaining) = self.response_timeout.checked_sub(started.elapsed()) else {
|
||||
return Err(PipeError::Timeout);
|
||||
};
|
||||
let Some(remaining) = self.response_timeout.checked_sub(started.elapsed()) else {
|
||||
return Err(PipeError::Timeout);
|
||||
};
|
||||
|
||||
match self.transport.recv_timeout(remaining)? {
|
||||
BrowserMessage::Response {
|
||||
seq: response_seq,
|
||||
success,
|
||||
data,
|
||||
aom_snapshot,
|
||||
timing,
|
||||
} if response_seq == seq => {
|
||||
return Ok(CommandOutput {
|
||||
seq: response_seq,
|
||||
success,
|
||||
data,
|
||||
aom_snapshot,
|
||||
timing,
|
||||
});
|
||||
}
|
||||
BrowserMessage::Response {
|
||||
seq: response_seq, ..
|
||||
} => {
|
||||
return Err(PipeError::Protocol(format!(
|
||||
"received response seq {response_seq} while waiting for {seq}"
|
||||
)));
|
||||
}
|
||||
BrowserMessage::Init { .. } => {
|
||||
return Err(PipeError::UnexpectedMessage(
|
||||
"received duplicate init after handshake".to_string(),
|
||||
));
|
||||
}
|
||||
BrowserMessage::SubmitTask { .. } => {
|
||||
return Err(PipeError::UnexpectedMessage(
|
||||
"received submit_task while waiting for response".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
match self.transport.recv_timeout(remaining)? {
|
||||
BrowserMessage::Response {
|
||||
seq: response_seq,
|
||||
success,
|
||||
data,
|
||||
aom_snapshot,
|
||||
timing,
|
||||
} if response_seq == seq => Ok(CommandOutput {
|
||||
seq: response_seq,
|
||||
success,
|
||||
data,
|
||||
aom_snapshot,
|
||||
timing,
|
||||
}),
|
||||
BrowserMessage::Response {
|
||||
seq: response_seq, ..
|
||||
} => Err(PipeError::Protocol(format!(
|
||||
"received response seq {response_seq} while waiting for {seq}"
|
||||
))),
|
||||
BrowserMessage::Init { .. } => Err(PipeError::UnexpectedMessage(
|
||||
"received duplicate init after handshake".to_string(),
|
||||
)),
|
||||
BrowserMessage::SubmitTask { .. } => Err(PipeError::UnexpectedMessage(
|
||||
"received submit_task while waiting for response".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,8 +5,8 @@ pub mod protocol;
|
||||
pub use browser_tool::{BrowserPipeTool, CommandOutput};
|
||||
pub use handshake::{perform_handshake, HandshakeResult};
|
||||
pub use protocol::{
|
||||
supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage,
|
||||
SecurityFields, Timing,
|
||||
supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage, SecurityFields,
|
||||
Timing,
|
||||
};
|
||||
|
||||
use std::io::{BufRead, BufReader, Read, Write};
|
||||
@@ -71,7 +71,7 @@ impl StdioTransport {
|
||||
continue;
|
||||
}
|
||||
|
||||
if line.as_bytes().len() > MAX_MESSAGE_BYTES {
|
||||
if line.len() > MAX_MESSAGE_BYTES {
|
||||
let _ = tx.send(Err(PipeError::MessageTooLarge(line.len())));
|
||||
continue;
|
||||
}
|
||||
|
||||
85
src/skill/mod.rs
Normal file
85
src/skill/mod.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
pub mod router;
|
||||
pub mod zhihu;
|
||||
pub mod zhihu_hotlist;
|
||||
pub mod zhihu_hotlist_store;
|
||||
pub mod zhihu_navigation;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
|
||||
|
||||
fn relative_skill_resource_path(resource_name: &str) -> PathBuf {
|
||||
PathBuf::from("resources")
|
||||
.join("skills")
|
||||
.join(resource_name)
|
||||
}
|
||||
|
||||
pub(crate) fn skill_resource_path_from_executable(
|
||||
executable_path: PathBuf,
|
||||
resource_name: &str,
|
||||
) -> PathBuf {
|
||||
executable_path
|
||||
.parent()
|
||||
.map(|dir| dir.join("resources").join("skills").join(resource_name))
|
||||
.unwrap_or_else(|| relative_skill_resource_path(resource_name))
|
||||
}
|
||||
|
||||
pub(crate) fn default_skill_resource_path(resource_name: &str) -> PathBuf {
|
||||
std::env::current_exe()
|
||||
.ok()
|
||||
.map(|path| skill_resource_path_from_executable(path, resource_name))
|
||||
.filter(|path| path.exists())
|
||||
.unwrap_or_else(|| relative_skill_resource_path(resource_name))
|
||||
}
|
||||
|
||||
pub fn try_execute_skill<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
instruction: &str,
|
||||
) -> Result<Option<String>, PipeError> {
|
||||
match router::route_instruction(instruction)
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?
|
||||
{
|
||||
Some(router::RoutedSkill::ZhihuWrite(req)) => {
|
||||
let result = zhihu::execute(transport, browser_tool, req)
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
Ok(Some(result.summary))
|
||||
}
|
||||
Some(router::RoutedSkill::ZhihuHotlistCollect(req)) => {
|
||||
let result = zhihu_hotlist::execute_collect(transport, browser_tool, req)
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
Ok(Some(result.summary))
|
||||
}
|
||||
Some(router::RoutedSkill::ZhihuHotlistReport(req)) => {
|
||||
let result = zhihu_hotlist::execute_report(req)
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
Ok(Some(result.summary))
|
||||
}
|
||||
Some(router::RoutedSkill::ZhihuNavigate(req)) => {
|
||||
let result = zhihu_navigation::execute(transport, browser_tool, req)
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
Ok(Some(result.summary))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::skill_resource_path_from_executable;
|
||||
|
||||
#[test]
|
||||
fn skill_resource_path_uses_executable_directory_instead_of_cwd() {
|
||||
let executable_path = PathBuf::from("/tmp/out/KylinRelease/sgclaw");
|
||||
|
||||
let resolved =
|
||||
skill_resource_path_from_executable(executable_path, "zhihu_navigation_pages.json");
|
||||
|
||||
assert_eq!(
|
||||
resolved,
|
||||
PathBuf::from("/tmp/out/KylinRelease/resources/skills/zhihu_navigation_pages.json")
|
||||
);
|
||||
}
|
||||
}
|
||||
92
src/skill/router.rs
Normal file
92
src/skill/router.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
use thiserror::Error;
|
||||
|
||||
use super::zhihu::ZhihuWriteRequest;
|
||||
use super::zhihu_hotlist::{ZhihuHotlistCollectRequest, ZhihuHotlistReportRequest};
|
||||
use super::zhihu_navigation::{
|
||||
try_route_alias as try_route_zhihu_navigation_alias, ZhihuNavigateRequest,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum RoutedSkill {
|
||||
ZhihuWrite(ZhihuWriteRequest),
|
||||
ZhihuHotlistCollect(ZhihuHotlistCollectRequest),
|
||||
ZhihuHotlistReport(ZhihuHotlistReportRequest),
|
||||
ZhihuNavigate(ZhihuNavigateRequest),
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RouterError {
|
||||
#[error("missing skill name after skill: prefix")]
|
||||
MissingSkillName,
|
||||
#[error("missing JSON arguments for skill: {0}")]
|
||||
MissingArguments(String),
|
||||
#[error("unknown skill: {0}")]
|
||||
UnknownSkill(String),
|
||||
#[error("invalid JSON arguments for skill {skill}: {message}")]
|
||||
InvalidArguments { skill: String, message: String },
|
||||
}
|
||||
|
||||
pub fn route_instruction(instruction: &str) -> Result<Option<RoutedSkill>, RouterError> {
|
||||
let trimmed = instruction.trim();
|
||||
if trimmed.starts_with("skill:") {
|
||||
return parse_explicit_skill(trimmed).map(Some);
|
||||
}
|
||||
|
||||
match try_route_zhihu_navigation_alias(trimmed) {
|
||||
Ok(Some(req)) => Ok(Some(RoutedSkill::ZhihuNavigate(req))),
|
||||
Ok(None) => Ok(None),
|
||||
Err(err) => Err(RouterError::InvalidArguments {
|
||||
skill: "zhihu_navigate".to_string(),
|
||||
message: err.to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_explicit_skill(instruction: &str) -> Result<RoutedSkill, RouterError> {
|
||||
let rest = instruction
|
||||
.strip_prefix("skill:")
|
||||
.ok_or(RouterError::MissingSkillName)?
|
||||
.trim();
|
||||
|
||||
if rest.is_empty() {
|
||||
return Err(RouterError::MissingSkillName);
|
||||
}
|
||||
|
||||
let split_at = rest
|
||||
.find(char::is_whitespace)
|
||||
.ok_or_else(|| RouterError::MissingArguments(rest.to_string()))?;
|
||||
let name = rest[..split_at].trim();
|
||||
let args = rest[split_at..].trim();
|
||||
|
||||
if args.is_empty() {
|
||||
return Err(RouterError::MissingArguments(name.to_string()));
|
||||
}
|
||||
|
||||
match name {
|
||||
"zhihu_write" => serde_json::from_str::<ZhihuWriteRequest>(args)
|
||||
.map(RoutedSkill::ZhihuWrite)
|
||||
.map_err(|err| RouterError::InvalidArguments {
|
||||
skill: name.to_string(),
|
||||
message: err.to_string(),
|
||||
}),
|
||||
"zhihu_hotlist_collect" => serde_json::from_str::<ZhihuHotlistCollectRequest>(args)
|
||||
.map(RoutedSkill::ZhihuHotlistCollect)
|
||||
.map_err(|err| RouterError::InvalidArguments {
|
||||
skill: name.to_string(),
|
||||
message: err.to_string(),
|
||||
}),
|
||||
"zhihu_hotlist_report" => serde_json::from_str::<ZhihuHotlistReportRequest>(args)
|
||||
.map(RoutedSkill::ZhihuHotlistReport)
|
||||
.map_err(|err| RouterError::InvalidArguments {
|
||||
skill: name.to_string(),
|
||||
message: err.to_string(),
|
||||
}),
|
||||
"zhihu_navigate" => serde_json::from_str::<ZhihuNavigateRequest>(args)
|
||||
.map(RoutedSkill::ZhihuNavigate)
|
||||
.map_err(|err| RouterError::InvalidArguments {
|
||||
skill: name.to_string(),
|
||||
message: err.to_string(),
|
||||
}),
|
||||
other => Err(RouterError::UnknownSkill(other.to_string())),
|
||||
}
|
||||
}
|
||||
419
src/skill/zhihu.rs
Normal file
419
src/skill/zhihu.rs
Normal file
@@ -0,0 +1,419 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, Transport};
|
||||
|
||||
const ZHIHU_ARTICLE_URL_PREFIX: &str = "https://zhuanlan.zhihu.com/p/";
|
||||
const ZHIHU_ARTICLE_EDIT_SUFFIX: &str = "/edit";
|
||||
|
||||
fn default_publish() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
|
||||
pub struct ZhihuWriteRequest {
|
||||
pub title: String,
|
||||
pub body: String,
|
||||
#[serde(default = "default_publish")]
|
||||
pub publish: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
pub struct ZhihuWriteResult {
|
||||
pub summary: String,
|
||||
pub published: bool,
|
||||
pub final_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuFlow {
|
||||
pub entry_url: String,
|
||||
pub editor_url: String,
|
||||
pub domains: HashMap<String, String>,
|
||||
pub literals: HashMap<String, String>,
|
||||
pub selectors: HashMap<String, String>,
|
||||
pub steps: Vec<FlowStep>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct FlowStep {
|
||||
pub name: String,
|
||||
pub action: String,
|
||||
pub expected_domain: String,
|
||||
pub selector_ref: Option<String>,
|
||||
pub url_ref: Option<String>,
|
||||
pub text_source: Option<String>,
|
||||
#[serde(default)]
|
||||
pub clear_first: bool,
|
||||
pub wait_after_ms: Option<u64>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub outer: Option<bool>,
|
||||
pub x: Option<i64>,
|
||||
pub y: Option<i64>,
|
||||
#[serde(default)]
|
||||
pub only_when_publish: bool,
|
||||
pub expect_contains: Option<String>,
|
||||
pub expect_text_source: Option<String>,
|
||||
#[serde(default)]
|
||||
pub allow_empty_text: bool,
|
||||
#[serde(default)]
|
||||
pub capture_url: bool,
|
||||
pub log_message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ZhihuSkillError {
|
||||
#[error("title 不能为空")]
|
||||
EmptyTitle,
|
||||
#[error("body 不能为空")]
|
||||
EmptyBody,
|
||||
#[error("failed to load zhihu flow: {0}")]
|
||||
FlowLoad(String),
|
||||
#[error("unknown action in zhihu flow: {0}")]
|
||||
UnknownAction(String),
|
||||
#[error("missing selector ref in zhihu flow step: {0}")]
|
||||
MissingSelectorRef(String),
|
||||
#[error("missing url ref in zhihu flow step: {0}")]
|
||||
MissingUrlRef(String),
|
||||
#[error("missing selector in zhihu flow: {0}")]
|
||||
MissingSelector(String),
|
||||
#[error("missing domain in zhihu flow: {0}")]
|
||||
MissingDomain(String),
|
||||
#[error("missing text source in zhihu flow step: {0}")]
|
||||
MissingTextSource(String),
|
||||
#[error("missing scroll target in zhihu flow step: {0}")]
|
||||
MissingScrollTarget(String),
|
||||
#[error("browser action failed at step {step}: {message}")]
|
||||
BrowserActionFailed { step: String, message: String },
|
||||
#[error("step {step} expected text containing `{expected}`, got `{actual}`")]
|
||||
ExpectedTextMissing {
|
||||
step: String,
|
||||
expected: String,
|
||||
actual: String,
|
||||
},
|
||||
#[error("step {step} expected text `{expected}`, got `{actual}`")]
|
||||
ExpectedTextMismatch {
|
||||
step: String,
|
||||
expected: String,
|
||||
actual: String,
|
||||
},
|
||||
#[error("step {step} did not return article url; cannot confirm article was published")]
|
||||
MissingPublishedUrl { step: String },
|
||||
}
|
||||
|
||||
pub fn default_flow_path() -> PathBuf {
|
||||
super::default_skill_resource_path("zhihu_write_flow.json")
|
||||
}
|
||||
|
||||
pub fn load_flow() -> Result<ZhihuFlow, ZhihuSkillError> {
|
||||
let path = default_flow_path();
|
||||
let contents = fs::read_to_string(&path)
|
||||
.map_err(|err| ZhihuSkillError::FlowLoad(format!("{} ({})", err, path.display())))?;
|
||||
serde_json::from_str(&contents)
|
||||
.map_err(|err| ZhihuSkillError::FlowLoad(format!("{} ({})", err, path.display())))
|
||||
}
|
||||
|
||||
pub fn execute<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
req: ZhihuWriteRequest,
|
||||
) -> Result<ZhihuWriteResult, ZhihuSkillError> {
|
||||
validate_request(&req)?;
|
||||
let flow = load_flow()?;
|
||||
let mut final_url = None;
|
||||
let mut published_url = None;
|
||||
let mut publish_capture_step = None;
|
||||
|
||||
for step in &flow.steps {
|
||||
if step.only_when_publish && !req.publish {
|
||||
continue;
|
||||
}
|
||||
|
||||
transport
|
||||
.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: step.log_message.clone(),
|
||||
})
|
||||
.map_err(|err| ZhihuSkillError::BrowserActionFailed {
|
||||
step: step.name.clone(),
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
|
||||
let action = parse_action(&step.action)?;
|
||||
let expected_domain = resolve_domain(&flow, &step.expected_domain)?;
|
||||
let params = build_params(&flow, step, &req)?;
|
||||
let result = browser_tool
|
||||
.invoke(action, params, &expected_domain)
|
||||
.map_err(|err| ZhihuSkillError::BrowserActionFailed {
|
||||
step: step.name.clone(),
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
|
||||
if !result.success {
|
||||
return Err(ZhihuSkillError::BrowserActionFailed {
|
||||
step: step.name.clone(),
|
||||
message: result.data.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if step.capture_url {
|
||||
if let Some(url) = extract_url(&result.data) {
|
||||
if step.only_when_publish {
|
||||
if is_published_article_url(&url) {
|
||||
published_url = normalize_published_article_url(&url);
|
||||
}
|
||||
} else {
|
||||
final_url = Some(url);
|
||||
}
|
||||
}
|
||||
if step.only_when_publish {
|
||||
publish_capture_step = Some(step.name.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(expected) = step.expect_contains.as_deref() {
|
||||
let actual = extract_text(&result.data);
|
||||
if !actual.contains(expected) {
|
||||
return Err(ZhihuSkillError::ExpectedTextMissing {
|
||||
step: step.name.clone(),
|
||||
expected: expected.to_string(),
|
||||
actual,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(source) = step.expect_text_source.as_deref() {
|
||||
let expected = resolve_text_source(&req, source)?.to_string();
|
||||
let actual = extract_text(&result.data);
|
||||
if actual.is_empty() && step.allow_empty_text {
|
||||
continue;
|
||||
}
|
||||
if actual != expected {
|
||||
return Err(ZhihuSkillError::ExpectedTextMismatch {
|
||||
step: step.name.clone(),
|
||||
expected,
|
||||
actual,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if req.publish {
|
||||
final_url = Some(
|
||||
published_url.ok_or_else(|| ZhihuSkillError::MissingPublishedUrl {
|
||||
step: publish_capture_step.unwrap_or_else(|| "publish_complete".to_string()),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
|
||||
Ok(ZhihuWriteResult {
|
||||
summary: build_summary(&req, final_url.as_deref()),
|
||||
published: req.publish,
|
||||
final_url,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_request(req: &ZhihuWriteRequest) -> Result<(), ZhihuSkillError> {
|
||||
if req.title.trim().is_empty() {
|
||||
return Err(ZhihuSkillError::EmptyTitle);
|
||||
}
|
||||
if req.body.trim().is_empty() {
|
||||
return Err(ZhihuSkillError::EmptyBody);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_action(name: &str) -> Result<Action, ZhihuSkillError> {
|
||||
match name {
|
||||
"click" => Ok(Action::Click),
|
||||
"type" => Ok(Action::Type),
|
||||
"navigate" => Ok(Action::Navigate),
|
||||
"getText" => Ok(Action::GetText),
|
||||
"getHtml" => Ok(Action::GetHtml),
|
||||
"waitForSelector" => Ok(Action::WaitForSelector),
|
||||
"scrollTo" => Ok(Action::ScrollTo),
|
||||
other => Err(ZhihuSkillError::UnknownAction(other.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_domain(flow: &ZhihuFlow, key: &str) -> Result<String, ZhihuSkillError> {
|
||||
flow.domains
|
||||
.get(key)
|
||||
.cloned()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingDomain(key.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_selector<'a>(flow: &'a ZhihuFlow, key: &str) -> Result<&'a str, ZhihuSkillError> {
|
||||
flow.selectors
|
||||
.get(key)
|
||||
.map(String::as_str)
|
||||
.ok_or_else(|| ZhihuSkillError::MissingSelector(key.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_text_source<'a>(
|
||||
req: &'a ZhihuWriteRequest,
|
||||
source: &str,
|
||||
) -> Result<&'a str, ZhihuSkillError> {
|
||||
match source {
|
||||
"title" => Ok(req.title.as_str()),
|
||||
"body" => Ok(req.body.as_str()),
|
||||
other => Err(ZhihuSkillError::MissingTextSource(other.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_params(
|
||||
flow: &ZhihuFlow,
|
||||
step: &FlowStep,
|
||||
req: &ZhihuWriteRequest,
|
||||
) -> Result<Value, ZhihuSkillError> {
|
||||
match step.action.as_str() {
|
||||
"navigate" => {
|
||||
let url_ref = step
|
||||
.url_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingUrlRef(step.name.clone()))?;
|
||||
let url = match url_ref {
|
||||
"entry_url" => flow.entry_url.as_str(),
|
||||
"editor_url" => flow.editor_url.as_str(),
|
||||
other => {
|
||||
return Err(ZhihuSkillError::MissingUrlRef(format!(
|
||||
"{}:{}",
|
||||
step.name, other
|
||||
)))
|
||||
}
|
||||
};
|
||||
Ok(json!({ "url": url }))
|
||||
}
|
||||
"click" => {
|
||||
let selector_ref = step
|
||||
.selector_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
|
||||
let selector = resolve_selector(flow, selector_ref)?;
|
||||
let mut params = serde_json::Map::new();
|
||||
params.insert("selector".to_string(), Value::String(selector.to_string()));
|
||||
if let Some(wait_after_ms) = step.wait_after_ms {
|
||||
params.insert("wait_after".to_string(), Value::from(wait_after_ms));
|
||||
}
|
||||
Ok(Value::Object(params))
|
||||
}
|
||||
"type" => {
|
||||
let selector_ref = step
|
||||
.selector_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
|
||||
let selector = resolve_selector(flow, selector_ref)?;
|
||||
let text_source = step
|
||||
.text_source
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingTextSource(step.name.clone()))?;
|
||||
let text = resolve_text_source(req, text_source)?;
|
||||
Ok(json!({
|
||||
"selector": selector,
|
||||
"text": text,
|
||||
"clear_first": step.clear_first,
|
||||
}))
|
||||
}
|
||||
"getText" => {
|
||||
let selector_ref = step
|
||||
.selector_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
|
||||
let selector = resolve_selector(flow, selector_ref)?;
|
||||
Ok(json!({ "selector": selector }))
|
||||
}
|
||||
"getHtml" => {
|
||||
let selector_ref = step
|
||||
.selector_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
|
||||
let selector = resolve_selector(flow, selector_ref)?;
|
||||
let mut params = serde_json::Map::new();
|
||||
params.insert("selector".to_string(), Value::String(selector.to_string()));
|
||||
if let Some(outer) = step.outer {
|
||||
params.insert("outer".to_string(), Value::Bool(outer));
|
||||
}
|
||||
Ok(Value::Object(params))
|
||||
}
|
||||
"waitForSelector" => {
|
||||
let selector_ref = step
|
||||
.selector_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuSkillError::MissingSelectorRef(step.name.clone()))?;
|
||||
let selector = resolve_selector(flow, selector_ref)?;
|
||||
let mut params = serde_json::Map::new();
|
||||
params.insert("selector".to_string(), Value::String(selector.to_string()));
|
||||
if let Some(timeout_ms) = step.timeout_ms {
|
||||
params.insert("timeout_ms".to_string(), Value::from(timeout_ms));
|
||||
}
|
||||
Ok(Value::Object(params))
|
||||
}
|
||||
"scrollTo" => {
|
||||
if let Some(selector_ref) = step.selector_ref.as_deref() {
|
||||
let selector = resolve_selector(flow, selector_ref)?;
|
||||
return Ok(json!({ "selector": selector }));
|
||||
}
|
||||
if step.x.is_none() && step.y.is_none() {
|
||||
return Err(ZhihuSkillError::MissingScrollTarget(step.name.clone()));
|
||||
}
|
||||
let mut params = serde_json::Map::new();
|
||||
if let Some(x) = step.x {
|
||||
params.insert("x".to_string(), Value::from(x));
|
||||
}
|
||||
if let Some(y) = step.y {
|
||||
params.insert("y".to_string(), Value::from(y));
|
||||
}
|
||||
Ok(Value::Object(params))
|
||||
}
|
||||
other => Err(ZhihuSkillError::UnknownAction(other.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_text(data: &Value) -> String {
|
||||
data.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn extract_url(data: &Value) -> Option<String> {
|
||||
data.get("url")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::trim)
|
||||
.filter(|url| !url.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
}
|
||||
|
||||
fn is_published_article_url(url: &str) -> bool {
|
||||
normalize_published_article_url(url).is_some()
|
||||
}
|
||||
|
||||
fn normalize_published_article_url(url: &str) -> Option<String> {
|
||||
let trimmed = url.trim();
|
||||
if !trimmed.starts_with(ZHIHU_ARTICLE_URL_PREFIX) {
|
||||
return None;
|
||||
}
|
||||
if trimmed.ends_with(ZHIHU_ARTICLE_EDIT_SUFFIX) {
|
||||
return Some(
|
||||
trimmed
|
||||
.trim_end_matches(ZHIHU_ARTICLE_EDIT_SUFFIX)
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
Some(trimmed.to_string())
|
||||
}
|
||||
|
||||
fn build_summary(req: &ZhihuWriteRequest, final_url: Option<&str>) -> String {
|
||||
if req.publish {
|
||||
let url = final_url.expect("publish flow must provide final_url before building summary");
|
||||
format!("知乎文章已发布:{} ({url})", req.title.trim())
|
||||
} else {
|
||||
format!("知乎文章草稿已填充:{}", req.title.trim())
|
||||
}
|
||||
}
|
||||
815
src/skill/zhihu_hotlist.rs
Normal file
815
src/skill/zhihu_hotlist.rs
Normal file
@@ -0,0 +1,815 @@
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::OnceLock;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, CommandOutput, Transport};
|
||||
|
||||
use super::zhihu_hotlist_store::{
|
||||
load_latest_snapshot, load_snapshot, persist_snapshot, resolve_store_dir,
|
||||
ZhihuCommentMetricSnapshot, ZhihuHotItemSnapshot, ZhihuHotlistCollectionStats,
|
||||
ZhihuHotlistSnapshot, ZhihuHotlistStoreError,
|
||||
};
|
||||
|
||||
const COLLECTOR_VERSION: &str = "zhihu_hotlist_v1";
|
||||
const DEFAULT_WAIT_TIMEOUT_MS: u64 = 5_000;
|
||||
const DEFAULT_COMMENT_SCROLL_Y: i64 = 1_200;
|
||||
|
||||
fn default_top_n() -> usize {
|
||||
10
|
||||
}
|
||||
|
||||
fn default_comments_per_item() -> usize {
|
||||
20
|
||||
}
|
||||
|
||||
fn default_report_top_n() -> usize {
|
||||
10
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
|
||||
pub struct ZhihuHotlistCollectRequest {
|
||||
#[serde(default = "default_top_n")]
|
||||
pub top_n: usize,
|
||||
#[serde(default = "default_comments_per_item")]
|
||||
pub comments_per_item: usize,
|
||||
#[serde(default)]
|
||||
pub store_dir: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
pub struct ZhihuHotlistCollectResult {
|
||||
pub summary: String,
|
||||
pub snapshot_id: String,
|
||||
pub item_count: usize,
|
||||
pub snapshot_path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
|
||||
pub struct ZhihuHotlistReportRequest {
|
||||
#[serde(default)]
|
||||
pub snapshot_id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub store_dir: Option<String>,
|
||||
#[serde(default = "default_report_top_n")]
|
||||
pub top_n: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
pub struct ZhihuHotlistReportResult {
|
||||
pub summary: String,
|
||||
pub snapshot_id: String,
|
||||
pub item_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuHotlistFlow {
|
||||
pub hotlist_url: String,
|
||||
pub domains: HashMap<String, String>,
|
||||
pub literals: HashMap<String, String>,
|
||||
pub selectors: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ZhihuHotlistSkillError {
|
||||
#[error("top_n must be greater than 0")]
|
||||
InvalidTopN,
|
||||
#[error("comments_per_item must be greater than 0")]
|
||||
InvalidCommentsPerItem,
|
||||
#[error("failed to load zhihu hotlist flow: {0}")]
|
||||
FlowLoad(String),
|
||||
#[error("missing selector in zhihu hotlist flow: {0}")]
|
||||
MissingSelector(String),
|
||||
#[error("missing domain in zhihu hotlist flow: {0}")]
|
||||
MissingDomain(String),
|
||||
#[error("missing literal in zhihu hotlist flow: {0}")]
|
||||
MissingLiteral(String),
|
||||
#[error("invalid selector in zhihu hotlist flow `{name}`: {message}")]
|
||||
InvalidSelector { name: String, message: String },
|
||||
#[error("browser action failed at step {step}: {message}")]
|
||||
BrowserActionFailed { step: String, message: String },
|
||||
#[error("zhihu hotlist page did not expose any items")]
|
||||
NoHotlistItems,
|
||||
#[error("zhihu hotlist html did not include enough data for item extraction")]
|
||||
IncompleteHotlistHtml,
|
||||
#[error(transparent)]
|
||||
Store(#[from] ZhihuHotlistStoreError),
|
||||
}
|
||||
|
||||
pub fn default_flow_path() -> PathBuf {
|
||||
super::default_skill_resource_path("zhihu_hotlist_flow.json")
|
||||
}
|
||||
|
||||
pub fn load_flow() -> Result<ZhihuHotlistFlow, ZhihuHotlistSkillError> {
|
||||
let path = default_flow_path();
|
||||
let contents = fs::read_to_string(&path)
|
||||
.map_err(|err| ZhihuHotlistSkillError::FlowLoad(format!("{} ({})", err, path.display())))?;
|
||||
serde_json::from_str(&contents)
|
||||
.map_err(|err| ZhihuHotlistSkillError::FlowLoad(format!("{} ({})", err, path.display())))
|
||||
}
|
||||
|
||||
pub fn execute_collect<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
req: ZhihuHotlistCollectRequest,
|
||||
) -> Result<ZhihuHotlistCollectResult, ZhihuHotlistSkillError> {
|
||||
validate_collect_request(&req)?;
|
||||
let flow = load_flow()?;
|
||||
let zhihu_domain = resolve_domain(&flow, "zhihu")?;
|
||||
let hotlist_guard = resolve_literal(&flow, "hotlist_guard")?;
|
||||
let hotlist_root_selector = resolve_selector(&flow, "hotlist_root")?;
|
||||
let hotlist_item_selector = resolve_selector(&flow, "hotlist_item")?;
|
||||
let comment_list_selector = resolve_selector(&flow, "comment_list")?;
|
||||
let comment_item_selector = resolve_selector(&flow, "comment_item")?;
|
||||
let comment_metric_selector = resolve_selector(&flow, "comment_metric")?;
|
||||
|
||||
let page_url = ensure_hotlist_page(
|
||||
transport,
|
||||
browser_tool,
|
||||
&flow,
|
||||
&zhihu_domain,
|
||||
hotlist_guard,
|
||||
hotlist_root_selector,
|
||||
hotlist_item_selector,
|
||||
)?;
|
||||
|
||||
let hotlist_html = run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
"capture hotlist html",
|
||||
Action::GetHtml,
|
||||
json!({ "selector": hotlist_root_selector, "outer": true }),
|
||||
&zhihu_domain,
|
||||
)?;
|
||||
let hotlist_items = parse_hotlist_items(&hotlist_html.data, &flow, req.top_n)?;
|
||||
if hotlist_items.is_empty() {
|
||||
return Err(ZhihuHotlistSkillError::NoHotlistItems);
|
||||
}
|
||||
|
||||
let mut items = Vec::with_capacity(hotlist_items.len());
|
||||
let mut partial_items = 0usize;
|
||||
let mut items_with_comment_metrics = 0usize;
|
||||
let mut total_comment_metric_records = 0usize;
|
||||
|
||||
let comment_context = CommentCollectionContext {
|
||||
zhihu_domain: &zhihu_domain,
|
||||
comment_list_selector,
|
||||
comment_item_selector,
|
||||
comment_metric_selector,
|
||||
page_root_selector: hotlist_root_selector,
|
||||
comments_per_item: req.comments_per_item,
|
||||
};
|
||||
|
||||
for hot_item in hotlist_items {
|
||||
let comment_metrics = match collect_comment_metrics(
|
||||
transport,
|
||||
browser_tool,
|
||||
&comment_context,
|
||||
&hot_item.url,
|
||||
hot_item.rank,
|
||||
) {
|
||||
Ok(metrics) => metrics,
|
||||
Err(_) => {
|
||||
partial_items += 1;
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
|
||||
if !comment_metrics.is_empty() {
|
||||
items_with_comment_metrics += 1;
|
||||
total_comment_metric_records += comment_metrics.len();
|
||||
}
|
||||
|
||||
items.push(ZhihuHotItemSnapshot {
|
||||
rank: hot_item.rank,
|
||||
item_id: hot_item.item_id,
|
||||
url: hot_item.url,
|
||||
title: hot_item.title,
|
||||
summary: hot_item.summary,
|
||||
heat_text: hot_item.heat_text.clone(),
|
||||
heat_value: parse_count_text(&hot_item.heat_text),
|
||||
comment_metrics,
|
||||
});
|
||||
}
|
||||
|
||||
let snapshot = ZhihuHotlistSnapshot {
|
||||
snapshot_id: build_snapshot_id(),
|
||||
captured_at_ms: now_unix_ms(),
|
||||
page_url,
|
||||
collector_version: COLLECTOR_VERSION.to_string(),
|
||||
collection_stats: ZhihuHotlistCollectionStats {
|
||||
requested_items: req.top_n,
|
||||
collected_items: items.len(),
|
||||
items_with_comment_metrics,
|
||||
total_comment_metric_records,
|
||||
partial_items,
|
||||
},
|
||||
items,
|
||||
};
|
||||
|
||||
let store_dir = resolve_store_dir(req.store_dir.as_deref());
|
||||
let persisted = persist_snapshot(&store_dir, &snapshot)?;
|
||||
let summary = format!(
|
||||
"知乎热榜快照已保存:{} 条热榜,{} 条评论指标记录 ({})",
|
||||
snapshot.items.len(),
|
||||
snapshot.collection_stats.total_comment_metric_records,
|
||||
persisted.snapshot_path.display()
|
||||
);
|
||||
|
||||
Ok(ZhihuHotlistCollectResult {
|
||||
summary,
|
||||
snapshot_id: snapshot.snapshot_id,
|
||||
item_count: snapshot.items.len(),
|
||||
snapshot_path: persisted.snapshot_path.display().to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn execute_report(
|
||||
req: ZhihuHotlistReportRequest,
|
||||
) -> Result<ZhihuHotlistReportResult, ZhihuHotlistSkillError> {
|
||||
validate_report_request(&req)?;
|
||||
let store_dir = resolve_store_dir(req.store_dir.as_deref());
|
||||
let snapshot = match req.snapshot_id.as_deref() {
|
||||
Some(snapshot_id) if !snapshot_id.trim().is_empty() => {
|
||||
load_snapshot(&store_dir, snapshot_id.trim())?
|
||||
}
|
||||
_ => load_latest_snapshot(&store_dir)?,
|
||||
};
|
||||
|
||||
let mut lines = vec![format!(
|
||||
"知乎热榜报告 {}: 共 {} 条,采集于 {}",
|
||||
snapshot.snapshot_id,
|
||||
snapshot.items.len(),
|
||||
snapshot.captured_at_ms
|
||||
)];
|
||||
|
||||
for item in snapshot.items.iter().take(req.top_n) {
|
||||
let totals = aggregate_comment_metrics(&item.comment_metrics);
|
||||
lines.push(format!(
|
||||
"{}. {} | 热度 {} | 评论指标 {} 条 | 回复 {} | 赞同 {} | 收藏 {} | 红心 {}",
|
||||
item.rank,
|
||||
item.title,
|
||||
item.heat_text,
|
||||
item.comment_metrics.len(),
|
||||
totals.reply_count,
|
||||
totals.upvote_count,
|
||||
totals.favorite_count,
|
||||
totals.heart_count,
|
||||
));
|
||||
}
|
||||
|
||||
Ok(ZhihuHotlistReportResult {
|
||||
summary: lines.join("\n"),
|
||||
snapshot_id: snapshot.snapshot_id,
|
||||
item_count: snapshot.items.len(),
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_collect_request(
|
||||
req: &ZhihuHotlistCollectRequest,
|
||||
) -> Result<(), ZhihuHotlistSkillError> {
|
||||
if req.top_n == 0 {
|
||||
return Err(ZhihuHotlistSkillError::InvalidTopN);
|
||||
}
|
||||
if req.comments_per_item == 0 {
|
||||
return Err(ZhihuHotlistSkillError::InvalidCommentsPerItem);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_report_request(req: &ZhihuHotlistReportRequest) -> Result<(), ZhihuHotlistSkillError> {
|
||||
if req.top_n == 0 {
|
||||
return Err(ZhihuHotlistSkillError::InvalidTopN);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_hotlist_page<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
flow: &ZhihuHotlistFlow,
|
||||
zhihu_domain: &str,
|
||||
hotlist_guard: &str,
|
||||
hotlist_root_selector: &str,
|
||||
hotlist_item_selector: &str,
|
||||
) -> Result<String, ZhihuHotlistSkillError> {
|
||||
let hotlist_probe = run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
"probe current Zhihu page for hotlist guard",
|
||||
Action::GetText,
|
||||
json!({ "selector": hotlist_root_selector }),
|
||||
zhihu_domain,
|
||||
);
|
||||
|
||||
if let Ok(result) = hotlist_probe {
|
||||
let text = extract_text(&result.data);
|
||||
if text.contains(hotlist_guard) {
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
"wait for hotlist items on current page",
|
||||
Action::WaitForSelector,
|
||||
json!({ "selector": hotlist_item_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
|
||||
zhihu_domain,
|
||||
)?;
|
||||
return Ok(extract_url(&result.data).unwrap_or_else(|| flow.hotlist_url.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
let navigate = run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
"navigate to Zhihu hotlist",
|
||||
Action::Navigate,
|
||||
json!({ "url": flow.hotlist_url }),
|
||||
zhihu_domain,
|
||||
)?;
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
"wait for Zhihu hotlist items",
|
||||
Action::WaitForSelector,
|
||||
json!({ "selector": hotlist_item_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
|
||||
zhihu_domain,
|
||||
)?;
|
||||
|
||||
Ok(extract_url(&navigate.data).unwrap_or_else(|| flow.hotlist_url.clone()))
|
||||
}
|
||||
|
||||
fn collect_comment_metrics<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
context: &CommentCollectionContext<'_>,
|
||||
item_url: &str,
|
||||
rank: usize,
|
||||
) -> Result<Vec<ZhihuCommentMetricSnapshot>, ZhihuHotlistSkillError> {
|
||||
let step_prefix = format!("collect comment metrics for hot item #{rank}");
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
&format!("{step_prefix}: navigate detail page"),
|
||||
Action::Navigate,
|
||||
json!({ "url": item_url }),
|
||||
context.zhihu_domain,
|
||||
)?;
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
&format!("{step_prefix}: wait for page root"),
|
||||
Action::WaitForSelector,
|
||||
json!({ "selector": context.page_root_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
|
||||
context.zhihu_domain,
|
||||
)?;
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
&format!("{step_prefix}: scroll toward comments"),
|
||||
Action::ScrollTo,
|
||||
json!({ "y": DEFAULT_COMMENT_SCROLL_Y }),
|
||||
context.zhihu_domain,
|
||||
)?;
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
&format!("{step_prefix}: wait for comment list"),
|
||||
Action::WaitForSelector,
|
||||
json!({ "selector": context.comment_list_selector, "timeout_ms": DEFAULT_WAIT_TIMEOUT_MS }),
|
||||
context.zhihu_domain,
|
||||
)?;
|
||||
run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
&format!("{step_prefix}: scroll comment list into view"),
|
||||
Action::ScrollTo,
|
||||
json!({ "selector": context.comment_list_selector }),
|
||||
context.zhihu_domain,
|
||||
)?;
|
||||
let comments_html = run_action(
|
||||
transport,
|
||||
browser_tool,
|
||||
&format!("{step_prefix}: capture page html for comments"),
|
||||
Action::GetHtml,
|
||||
json!({ "selector": context.page_root_selector, "outer": true }),
|
||||
context.zhihu_domain,
|
||||
)?;
|
||||
|
||||
Ok(parse_comment_metrics(
|
||||
&comments_html.data,
|
||||
context.comment_list_selector,
|
||||
context.comment_item_selector,
|
||||
context.comment_metric_selector,
|
||||
context.comments_per_item,
|
||||
))
|
||||
}
|
||||
|
||||
fn run_action<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
step: &str,
|
||||
action: Action,
|
||||
params: Value,
|
||||
expected_domain: &str,
|
||||
) -> Result<CommandOutput, ZhihuHotlistSkillError> {
|
||||
transport
|
||||
.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: step.to_string(),
|
||||
})
|
||||
.map_err(|err| ZhihuHotlistSkillError::BrowserActionFailed {
|
||||
step: step.to_string(),
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
|
||||
let result = browser_tool
|
||||
.invoke(action, params, expected_domain)
|
||||
.map_err(|err| ZhihuHotlistSkillError::BrowserActionFailed {
|
||||
step: step.to_string(),
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
|
||||
if !result.success {
|
||||
return Err(ZhihuHotlistSkillError::BrowserActionFailed {
|
||||
step: step.to_string(),
|
||||
message: result.data.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn parse_hotlist_items(
|
||||
data: &Value,
|
||||
flow: &ZhihuHotlistFlow,
|
||||
top_n: usize,
|
||||
) -> Result<Vec<ParsedHotItem>, ZhihuHotlistSkillError> {
|
||||
let html = extract_html(data);
|
||||
if html.trim().is_empty() {
|
||||
return Err(ZhihuHotlistSkillError::IncompleteHotlistHtml);
|
||||
}
|
||||
|
||||
let document = Html::parse_document(&html);
|
||||
let item_selector = parse_selector("hotlist_item", resolve_selector(flow, "hotlist_item")?)?;
|
||||
let title_link_selector = parse_selector(
|
||||
"hotlist_title_link",
|
||||
resolve_selector(flow, "hotlist_title_link")?,
|
||||
)?;
|
||||
let summary_selector = parse_selector(
|
||||
"hotlist_summary",
|
||||
resolve_selector(flow, "hotlist_summary")?,
|
||||
)?;
|
||||
let heat_selector = parse_selector("hotlist_heat", resolve_selector(flow, "hotlist_heat")?)?;
|
||||
|
||||
let mut seen_urls = HashSet::new();
|
||||
let mut items = Vec::new();
|
||||
|
||||
for (index, element) in document.select(&item_selector).enumerate() {
|
||||
let Some(link) = element.select(&title_link_selector).next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let title = compact_text(&link);
|
||||
if title.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let href = link.value().attr("href").unwrap_or_default();
|
||||
let url = normalize_zhihu_url(href);
|
||||
if url.is_empty() || !seen_urls.insert(url.clone()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let summary = element
|
||||
.select(&summary_selector)
|
||||
.next()
|
||||
.map(|node| compact_text(&node))
|
||||
.unwrap_or_default();
|
||||
let heat_text = element
|
||||
.select(&heat_selector)
|
||||
.next()
|
||||
.map(|node| compact_text(&node))
|
||||
.unwrap_or_default();
|
||||
|
||||
items.push(ParsedHotItem {
|
||||
rank: index + 1,
|
||||
item_id: derive_item_id(&url),
|
||||
url,
|
||||
title,
|
||||
summary,
|
||||
heat_text,
|
||||
});
|
||||
if items.len() >= top_n {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if items.is_empty() {
|
||||
return Err(ZhihuHotlistSkillError::NoHotlistItems);
|
||||
}
|
||||
|
||||
for (index, item) in items.iter_mut().enumerate() {
|
||||
item.rank = index + 1;
|
||||
}
|
||||
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn parse_comment_metrics(
|
||||
data: &Value,
|
||||
comment_list_selector: &str,
|
||||
comment_item_selector: &str,
|
||||
comment_metric_selector: &str,
|
||||
comments_per_item: usize,
|
||||
) -> Vec<ZhihuCommentMetricSnapshot> {
|
||||
let html = extract_html(data);
|
||||
if html.trim().is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let document = Html::parse_document(&html);
|
||||
let comment_item_selector = match Selector::parse(comment_item_selector) {
|
||||
Ok(selector) => selector,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
let metric_selector = match Selector::parse(comment_metric_selector) {
|
||||
Ok(selector) => selector,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
let comment_list_selector = match Selector::parse(comment_list_selector) {
|
||||
Ok(selector) => selector,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
|
||||
let container = document
|
||||
.select(&comment_list_selector)
|
||||
.next()
|
||||
.map(|node| node.html())
|
||||
.unwrap_or_else(|| html.clone());
|
||||
let scoped_document = Html::parse_fragment(&container);
|
||||
|
||||
scoped_document
|
||||
.select(&comment_item_selector)
|
||||
.take(comments_per_item)
|
||||
.enumerate()
|
||||
.map(|(index, element)| {
|
||||
build_comment_metric_snapshot(index + 1, &element, &metric_selector)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_comment_metric_snapshot(
|
||||
position: usize,
|
||||
element: &ElementRef<'_>,
|
||||
metric_selector: &Selector,
|
||||
) -> ZhihuCommentMetricSnapshot {
|
||||
let mut raw_metrics = BTreeMap::new();
|
||||
let mut snapshot = ZhihuCommentMetricSnapshot {
|
||||
position,
|
||||
comment_id: element
|
||||
.value()
|
||||
.attr("data-id")
|
||||
.or_else(|| element.value().attr("data-comment-id"))
|
||||
.or_else(|| element.value().attr("id"))
|
||||
.map(ToString::to_string),
|
||||
reply_count: None,
|
||||
upvote_count: None,
|
||||
favorite_count: None,
|
||||
heart_count: None,
|
||||
raw_metrics: None,
|
||||
};
|
||||
|
||||
for metric in element.select(metric_selector) {
|
||||
let text = compact_text(&metric);
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let count = parse_count_text(&text).or(Some(0));
|
||||
let lowered = text.to_ascii_lowercase();
|
||||
if text.contains("回复") {
|
||||
snapshot.reply_count = count;
|
||||
} else if text.contains("赞") || lowered.contains("upvote") {
|
||||
snapshot.upvote_count = count;
|
||||
} else if text.contains("收藏")
|
||||
|| lowered.contains("favorite")
|
||||
|| lowered.contains("bookmark")
|
||||
{
|
||||
snapshot.favorite_count = count;
|
||||
} else if text.contains("喜欢")
|
||||
|| text.contains("红心")
|
||||
|| text.contains('❤')
|
||||
|| text.contains('♥')
|
||||
{
|
||||
snapshot.heart_count = count;
|
||||
} else if let Some(value) = count {
|
||||
raw_metrics.insert(sanitize_metric_key(&text), value);
|
||||
}
|
||||
}
|
||||
|
||||
if !raw_metrics.is_empty() {
|
||||
snapshot.raw_metrics = Some(raw_metrics);
|
||||
}
|
||||
|
||||
snapshot
|
||||
}
|
||||
|
||||
fn parse_selector(name: &str, raw: &str) -> Result<Selector, ZhihuHotlistSkillError> {
|
||||
Selector::parse(raw).map_err(|err| ZhihuHotlistSkillError::InvalidSelector {
|
||||
name: name.to_string(),
|
||||
message: err.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn resolve_selector<'a>(
|
||||
flow: &'a ZhihuHotlistFlow,
|
||||
key: &str,
|
||||
) -> Result<&'a str, ZhihuHotlistSkillError> {
|
||||
flow.selectors
|
||||
.get(key)
|
||||
.map(String::as_str)
|
||||
.ok_or_else(|| ZhihuHotlistSkillError::MissingSelector(key.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_domain(flow: &ZhihuHotlistFlow, key: &str) -> Result<String, ZhihuHotlistSkillError> {
|
||||
flow.domains
|
||||
.get(key)
|
||||
.cloned()
|
||||
.ok_or_else(|| ZhihuHotlistSkillError::MissingDomain(key.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_literal<'a>(
|
||||
flow: &'a ZhihuHotlistFlow,
|
||||
key: &str,
|
||||
) -> Result<&'a str, ZhihuHotlistSkillError> {
|
||||
flow.literals
|
||||
.get(key)
|
||||
.map(String::as_str)
|
||||
.ok_or_else(|| ZhihuHotlistSkillError::MissingLiteral(key.to_string()))
|
||||
}
|
||||
|
||||
fn extract_text(data: &Value) -> String {
|
||||
data.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.or_else(|| data.as_str())
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn extract_html(data: &Value) -> String {
|
||||
data.get("html")
|
||||
.and_then(Value::as_str)
|
||||
.or_else(|| data.get("outer_html").and_then(Value::as_str))
|
||||
.or_else(|| data.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn extract_url(data: &Value) -> Option<String> {
|
||||
data.get("url")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string)
|
||||
}
|
||||
|
||||
fn compact_text(element: &ElementRef<'_>) -> String {
|
||||
element
|
||||
.text()
|
||||
.map(str::trim)
|
||||
.filter(|text| !text.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn normalize_zhihu_url(raw: &str) -> String {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
if trimmed.starts_with("https://") || trimmed.starts_with("http://") {
|
||||
return trimmed.split('#').next().unwrap_or(trimmed).to_string();
|
||||
}
|
||||
if let Some(rest) = trimmed.strip_prefix("//") {
|
||||
return format!("https://{}", rest.split('#').next().unwrap_or(rest));
|
||||
}
|
||||
if trimmed.starts_with('/') {
|
||||
return format!("https://www.zhihu.com{}", trimmed);
|
||||
}
|
||||
format!("https://www.zhihu.com/{}", trimmed.trim_start_matches('/'))
|
||||
}
|
||||
|
||||
fn derive_item_id(url: &str) -> String {
|
||||
let normalized = url
|
||||
.trim()
|
||||
.trim_start_matches("https://")
|
||||
.trim_start_matches("http://");
|
||||
let path = normalized
|
||||
.split_once('/')
|
||||
.map(|(_, path)| path)
|
||||
.unwrap_or_default()
|
||||
.split('?')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.trim_matches('/');
|
||||
if path.is_empty() {
|
||||
"root".to_string()
|
||||
} else {
|
||||
path.replace('/', "_")
|
||||
}
|
||||
}
|
||||
|
||||
fn aggregate_comment_metrics(metrics: &[ZhihuCommentMetricSnapshot]) -> AggregatedCommentMetrics {
|
||||
let mut totals = AggregatedCommentMetrics::default();
|
||||
for metric in metrics {
|
||||
totals.reply_count += metric.reply_count.unwrap_or(0);
|
||||
totals.upvote_count += metric.upvote_count.unwrap_or(0);
|
||||
totals.favorite_count += metric.favorite_count.unwrap_or(0);
|
||||
totals.heart_count += metric.heart_count.unwrap_or(0);
|
||||
}
|
||||
totals
|
||||
}
|
||||
|
||||
fn sanitize_metric_key(text: &str) -> String {
|
||||
let compact = text
|
||||
.chars()
|
||||
.map(|ch| {
|
||||
if ch.is_ascii_alphanumeric() {
|
||||
ch.to_ascii_lowercase()
|
||||
} else {
|
||||
'_'
|
||||
}
|
||||
})
|
||||
.collect::<String>();
|
||||
compact.trim_matches('_').to_string()
|
||||
}
|
||||
|
||||
fn build_snapshot_id() -> String {
|
||||
format!("{}-{}", now_unix_ms(), Uuid::new_v4())
|
||||
}
|
||||
|
||||
fn now_unix_ms() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64
|
||||
}
|
||||
|
||||
fn parse_count_text(text: &str) -> Option<u64> {
|
||||
let compact = text.replace([',', ' '], "");
|
||||
let captures = count_regex().captures(&compact)?;
|
||||
let number = captures.name("number")?.as_str().parse::<f64>().ok()?;
|
||||
let unit = captures
|
||||
.name("unit")
|
||||
.map(|unit| unit.as_str())
|
||||
.unwrap_or_default();
|
||||
let multiplier = match unit {
|
||||
"万" | "w" | "W" => 10_000f64,
|
||||
"亿" => 100_000_000f64,
|
||||
"k" | "K" => 1_000f64,
|
||||
"m" | "M" => 1_000_000f64,
|
||||
_ => 1f64,
|
||||
};
|
||||
Some((number * multiplier).round() as u64)
|
||||
}
|
||||
|
||||
fn count_regex() -> &'static Regex {
|
||||
static REGEX: OnceLock<Regex> = OnceLock::new();
|
||||
REGEX.get_or_init(|| {
|
||||
Regex::new(r"(?P<number>\d+(?:\.\d+)?)\s*(?P<unit>万|亿|[kKmMwW])?").unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct CommentCollectionContext<'a> {
|
||||
zhihu_domain: &'a str,
|
||||
comment_list_selector: &'a str,
|
||||
comment_item_selector: &'a str,
|
||||
comment_metric_selector: &'a str,
|
||||
page_root_selector: &'a str,
|
||||
comments_per_item: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct ParsedHotItem {
|
||||
rank: usize,
|
||||
item_id: String,
|
||||
url: String,
|
||||
title: String,
|
||||
summary: String,
|
||||
heat_text: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||
struct AggregatedCommentMetrics {
|
||||
reply_count: u64,
|
||||
upvote_count: u64,
|
||||
favorite_count: u64,
|
||||
heart_count: u64,
|
||||
}
|
||||
184
src/skill/zhihu_hotlist_store.rs
Normal file
184
src/skill/zhihu_hotlist_store.rs
Normal file
@@ -0,0 +1,184 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct ZhihuHotlistIndex {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub latest_snapshot_id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub snapshots: Vec<ZhihuHotlistIndexEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ZhihuHotlistIndexEntry {
|
||||
pub snapshot_id: String,
|
||||
pub captured_at_ms: u64,
|
||||
pub path: String,
|
||||
pub item_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ZhihuHotlistSnapshot {
|
||||
pub snapshot_id: String,
|
||||
pub captured_at_ms: u64,
|
||||
pub page_url: String,
|
||||
pub collector_version: String,
|
||||
pub items: Vec<ZhihuHotItemSnapshot>,
|
||||
pub collection_stats: ZhihuHotlistCollectionStats,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ZhihuHotItemSnapshot {
|
||||
pub rank: usize,
|
||||
pub item_id: String,
|
||||
pub url: String,
|
||||
pub title: String,
|
||||
pub summary: String,
|
||||
pub heat_text: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub heat_value: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub comment_metrics: Vec<ZhihuCommentMetricSnapshot>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ZhihuCommentMetricSnapshot {
|
||||
pub position: usize,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub comment_id: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub reply_count: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub upvote_count: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub favorite_count: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub heart_count: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub raw_metrics: Option<BTreeMap<String, u64>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ZhihuHotlistCollectionStats {
|
||||
pub requested_items: usize,
|
||||
pub collected_items: usize,
|
||||
pub items_with_comment_metrics: usize,
|
||||
pub total_comment_metric_records: usize,
|
||||
pub partial_items: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PersistedSnapshotPaths {
|
||||
pub snapshot_path: PathBuf,
|
||||
pub index_path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ZhihuHotlistStoreError {
|
||||
#[error("io error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("json error: {0}")]
|
||||
Json(#[from] serde_json::Error),
|
||||
#[error("no persisted Zhihu hotlist snapshots found")]
|
||||
NoSnapshots,
|
||||
#[error("snapshot not found: {0}")]
|
||||
SnapshotNotFound(String),
|
||||
}
|
||||
|
||||
pub fn default_store_dir() -> PathBuf {
|
||||
std::env::current_dir()
|
||||
.unwrap_or_else(|_| PathBuf::from("."))
|
||||
.join("data")
|
||||
.join("zhihu_hotlist")
|
||||
}
|
||||
|
||||
pub fn resolve_store_dir(store_dir: Option<&str>) -> PathBuf {
|
||||
match store_dir {
|
||||
Some(path) if !path.trim().is_empty() => PathBuf::from(path),
|
||||
_ => default_store_dir(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn persist_snapshot(
|
||||
base_dir: &Path,
|
||||
snapshot: &ZhihuHotlistSnapshot,
|
||||
) -> Result<PersistedSnapshotPaths, ZhihuHotlistStoreError> {
|
||||
let snapshot_dir = snapshots_dir(base_dir);
|
||||
fs::create_dir_all(&snapshot_dir)?;
|
||||
|
||||
let snapshot_rel_path = format!("snapshots/{}.json", snapshot.snapshot_id);
|
||||
let snapshot_path = base_dir.join(&snapshot_rel_path);
|
||||
fs::write(&snapshot_path, serde_json::to_vec_pretty(snapshot)?)?;
|
||||
|
||||
let mut index = load_index(base_dir)?;
|
||||
index.latest_snapshot_id = Some(snapshot.snapshot_id.clone());
|
||||
index
|
||||
.snapshots
|
||||
.retain(|entry| entry.snapshot_id != snapshot.snapshot_id);
|
||||
index.snapshots.push(ZhihuHotlistIndexEntry {
|
||||
snapshot_id: snapshot.snapshot_id.clone(),
|
||||
captured_at_ms: snapshot.captured_at_ms,
|
||||
path: snapshot_rel_path,
|
||||
item_count: snapshot.items.len(),
|
||||
});
|
||||
index
|
||||
.snapshots
|
||||
.sort_by(|left, right| left.captured_at_ms.cmp(&right.captured_at_ms));
|
||||
|
||||
let index_path = index_path(base_dir);
|
||||
fs::write(&index_path, serde_json::to_vec_pretty(&index)?)?;
|
||||
|
||||
Ok(PersistedSnapshotPaths {
|
||||
snapshot_path,
|
||||
index_path,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load_index(base_dir: &Path) -> Result<ZhihuHotlistIndex, ZhihuHotlistStoreError> {
|
||||
let path = index_path(base_dir);
|
||||
if !path.exists() {
|
||||
return Ok(ZhihuHotlistIndex::default());
|
||||
}
|
||||
|
||||
let contents = fs::read_to_string(path)?;
|
||||
Ok(serde_json::from_str(&contents)?)
|
||||
}
|
||||
|
||||
pub fn load_snapshot(
|
||||
base_dir: &Path,
|
||||
snapshot_id: &str,
|
||||
) -> Result<ZhihuHotlistSnapshot, ZhihuHotlistStoreError> {
|
||||
let path = base_dir
|
||||
.join("snapshots")
|
||||
.join(format!("{}.json", snapshot_id.trim()));
|
||||
if !path.exists() {
|
||||
return Err(ZhihuHotlistStoreError::SnapshotNotFound(
|
||||
snapshot_id.trim().to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let contents = fs::read_to_string(path)?;
|
||||
Ok(serde_json::from_str(&contents)?)
|
||||
}
|
||||
|
||||
pub fn load_latest_snapshot(
|
||||
base_dir: &Path,
|
||||
) -> Result<ZhihuHotlistSnapshot, ZhihuHotlistStoreError> {
|
||||
let index = load_index(base_dir)?;
|
||||
let snapshot_id = index
|
||||
.latest_snapshot_id
|
||||
.ok_or(ZhihuHotlistStoreError::NoSnapshots)?;
|
||||
load_snapshot(base_dir, &snapshot_id)
|
||||
}
|
||||
|
||||
fn index_path(base_dir: &Path) -> PathBuf {
|
||||
base_dir.join("index.json")
|
||||
}
|
||||
|
||||
fn snapshots_dir(base_dir: &Path) -> PathBuf {
|
||||
base_dir.join("snapshots")
|
||||
}
|
||||
890
src/skill/zhihu_navigation.rs
Normal file
890
src/skill/zhihu_navigation.rs
Normal file
@@ -0,0 +1,890 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, CommandOutput, Transport};
|
||||
|
||||
const DEFAULT_WAIT_TIMEOUT_MS: u64 = 5_000;
|
||||
|
||||
fn default_ensure_loaded() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn default_capture_url() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
|
||||
pub struct ZhihuNavigateRequest {
|
||||
pub page: String,
|
||||
#[serde(default = "default_ensure_loaded")]
|
||||
pub ensure_loaded: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
|
||||
pub struct ZhihuNavigateResult {
|
||||
pub summary: String,
|
||||
pub page: String,
|
||||
pub final_url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuNavigationCatalog {
|
||||
pub domains: HashMap<String, String>,
|
||||
#[serde(default)]
|
||||
pub routes: HashMap<String, ZhihuRouteDefinition>,
|
||||
#[serde(default)]
|
||||
pub components: HashMap<String, ZhihuComponentDefinition>,
|
||||
#[serde(default)]
|
||||
pub flows: HashMap<String, ZhihuFlowDefinition>,
|
||||
pub targets: HashMap<String, ZhihuTargetDefinition>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuRouteDefinition {
|
||||
pub title: String,
|
||||
pub domain_ref: String,
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub aliases: Vec<String>,
|
||||
pub wait_selector: Option<String>,
|
||||
pub wait_timeout_ms: Option<u64>,
|
||||
pub expect_selector: Option<String>,
|
||||
pub expect_text: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuComponentDefinition {
|
||||
pub title: String,
|
||||
pub domain_ref: String,
|
||||
pub selector: String,
|
||||
#[serde(default)]
|
||||
pub aliases: Vec<String>,
|
||||
pub entry_route_ref: Option<String>,
|
||||
pub result_domain_ref: Option<String>,
|
||||
pub wait_selector: Option<String>,
|
||||
pub wait_timeout_ms: Option<u64>,
|
||||
pub expect_selector: Option<String>,
|
||||
pub expect_text: Option<String>,
|
||||
pub wait_after_ms: Option<u64>,
|
||||
#[serde(default = "default_capture_url")]
|
||||
pub capture_url: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ZhihuTargetKind {
|
||||
Route,
|
||||
Component,
|
||||
Flow,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ZhihuSummaryKind {
|
||||
Page,
|
||||
Entry,
|
||||
Menu,
|
||||
Navigation,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuTargetDefinition {
|
||||
pub title: String,
|
||||
pub kind: ZhihuTargetKind,
|
||||
pub summary_kind: Option<ZhihuSummaryKind>,
|
||||
pub route_ref: Option<String>,
|
||||
pub component_ref: Option<String>,
|
||||
pub flow_ref: Option<String>,
|
||||
#[serde(default)]
|
||||
pub aliases: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuFlowDefinition {
|
||||
pub steps: Vec<ZhihuFlowStep>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ZhihuFlowStep {
|
||||
pub name: String,
|
||||
pub action: String,
|
||||
pub route_ref: Option<String>,
|
||||
pub component_ref: Option<String>,
|
||||
pub expected_domain: Option<String>,
|
||||
pub timeout_ms: Option<u64>,
|
||||
pub wait_after_ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub capture_url: bool,
|
||||
pub expect_text: Option<String>,
|
||||
pub log_message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ZhihuNavigationError {
|
||||
#[error("page 不能为空")]
|
||||
EmptyPage,
|
||||
#[error("failed to load zhihu navigation catalog: {0}")]
|
||||
CatalogLoad(String),
|
||||
#[error("unknown zhihu target: {0}")]
|
||||
UnknownTarget(String),
|
||||
#[error("missing domain in zhihu navigation catalog: {0}")]
|
||||
MissingDomain(String),
|
||||
#[error("missing route in zhihu navigation catalog: {0}")]
|
||||
MissingRoute(String),
|
||||
#[error("missing component in zhihu navigation catalog: {0}")]
|
||||
MissingComponent(String),
|
||||
#[error("missing flow in zhihu navigation catalog: {0}")]
|
||||
MissingFlow(String),
|
||||
#[error("invalid target definition in zhihu navigation catalog: {0}")]
|
||||
InvalidTargetDefinition(String),
|
||||
#[error("missing route ref in zhihu navigation flow step: {0}")]
|
||||
MissingRouteRef(String),
|
||||
#[error("missing component ref in zhihu navigation flow step: {0}")]
|
||||
MissingComponentRef(String),
|
||||
#[error("unknown action in zhihu navigation flow: {0}")]
|
||||
UnknownAction(String),
|
||||
#[error("browser action failed at step {step}: {message}")]
|
||||
BrowserActionFailed { step: String, message: String },
|
||||
#[error("step {step} expected text containing `{expected}`, got `{actual}`")]
|
||||
ExpectedTextMissing {
|
||||
step: String,
|
||||
expected: String,
|
||||
actual: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct ExecutionState {
|
||||
final_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct PostActionChecks<'a> {
|
||||
expected_domain: &'a str,
|
||||
wait_selector: Option<&'a str>,
|
||||
wait_timeout_ms: Option<u64>,
|
||||
expect_selector: Option<&'a str>,
|
||||
expect_text: Option<&'a str>,
|
||||
reset_url_when_absent: bool,
|
||||
}
|
||||
|
||||
pub fn default_catalog_path() -> PathBuf {
|
||||
super::default_skill_resource_path("zhihu_navigation_pages.json")
|
||||
}
|
||||
|
||||
pub fn load_catalog() -> Result<ZhihuNavigationCatalog, ZhihuNavigationError> {
|
||||
let path = default_catalog_path();
|
||||
let contents = fs::read_to_string(&path).map_err(|err| {
|
||||
ZhihuNavigationError::CatalogLoad(format!("{} ({})", err, path.display()))
|
||||
})?;
|
||||
serde_json::from_str(&contents)
|
||||
.map_err(|err| ZhihuNavigationError::CatalogLoad(format!("{} ({})", err, path.display())))
|
||||
}
|
||||
|
||||
pub fn try_route_alias(
|
||||
instruction: &str,
|
||||
) -> Result<Option<ZhihuNavigateRequest>, ZhihuNavigationError> {
|
||||
let trimmed = instruction.trim();
|
||||
if !looks_like_navigation_intent(trimmed) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let catalog = load_catalog()?;
|
||||
let normalized_instruction = normalize_text(trimmed);
|
||||
let mut matches = Vec::new();
|
||||
|
||||
for (target_key, target) in &catalog.targets {
|
||||
let score = best_target_match_score(&catalog, target, &normalized_instruction);
|
||||
if score > 0 {
|
||||
matches.push((target_key.as_str(), score));
|
||||
}
|
||||
}
|
||||
|
||||
if matches.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
matches.sort_by(|left, right| right.1.cmp(&left.1).then_with(|| left.0.cmp(right.0)));
|
||||
if matches.len() > 1 && matches[0].1 == matches[1].1 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(Some(ZhihuNavigateRequest {
|
||||
page: matches[0].0.to_string(),
|
||||
ensure_loaded: true,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn execute<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
req: ZhihuNavigateRequest,
|
||||
) -> Result<ZhihuNavigateResult, ZhihuNavigationError> {
|
||||
validate_request(&req)?;
|
||||
let catalog = load_catalog()?;
|
||||
let target_key = req.page.trim();
|
||||
let target = resolve_target(&catalog, target_key)?;
|
||||
let mut state = ExecutionState::default();
|
||||
|
||||
match target.kind {
|
||||
ZhihuTargetKind::Route => {
|
||||
let route_ref = target.route_ref.as_deref().ok_or_else(|| {
|
||||
ZhihuNavigationError::InvalidTargetDefinition(target_key.to_string())
|
||||
})?;
|
||||
run_route(
|
||||
transport,
|
||||
browser_tool,
|
||||
&catalog,
|
||||
route_ref,
|
||||
req.ensure_loaded,
|
||||
&mut state,
|
||||
)?;
|
||||
}
|
||||
ZhihuTargetKind::Component => {
|
||||
let component_ref = target.component_ref.as_deref().ok_or_else(|| {
|
||||
ZhihuNavigationError::InvalidTargetDefinition(target_key.to_string())
|
||||
})?;
|
||||
run_component_target(
|
||||
transport,
|
||||
browser_tool,
|
||||
&catalog,
|
||||
component_ref,
|
||||
req.ensure_loaded,
|
||||
&mut state,
|
||||
)?;
|
||||
}
|
||||
ZhihuTargetKind::Flow => {
|
||||
let flow_ref = target.flow_ref.as_deref().ok_or_else(|| {
|
||||
ZhihuNavigationError::InvalidTargetDefinition(target_key.to_string())
|
||||
})?;
|
||||
run_flow(transport, browser_tool, &catalog, flow_ref, &mut state)?;
|
||||
}
|
||||
}
|
||||
|
||||
let final_url = state.final_url.unwrap_or_default();
|
||||
Ok(ZhihuNavigateResult {
|
||||
summary: build_summary(target, &final_url),
|
||||
page: target_key.to_string(),
|
||||
final_url,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_request(req: &ZhihuNavigateRequest) -> Result<(), ZhihuNavigationError> {
|
||||
if req.page.trim().is_empty() {
|
||||
return Err(ZhihuNavigationError::EmptyPage);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_route<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
route_ref: &str,
|
||||
ensure_loaded: bool,
|
||||
state: &mut ExecutionState,
|
||||
) -> Result<(), ZhihuNavigationError> {
|
||||
let route = resolve_route(catalog, route_ref)?;
|
||||
let expected_domain = resolve_domain(catalog, &route.domain_ref)?;
|
||||
|
||||
send_log(transport, &format!("navigate {}", route.url), "navigate")?;
|
||||
let navigate_result = invoke_browser_action(
|
||||
browser_tool,
|
||||
Action::Navigate,
|
||||
json!({ "url": route.url }),
|
||||
expected_domain.as_str(),
|
||||
"navigate",
|
||||
)?;
|
||||
state.final_url = Some(extract_url(&navigate_result.data).unwrap_or_else(|| route.url.clone()));
|
||||
|
||||
if ensure_loaded {
|
||||
run_post_action_checks(
|
||||
transport,
|
||||
browser_tool,
|
||||
PostActionChecks {
|
||||
expected_domain: expected_domain.as_str(),
|
||||
wait_selector: route.wait_selector.as_deref(),
|
||||
wait_timeout_ms: route.wait_timeout_ms,
|
||||
expect_selector: route.expect_selector.as_deref(),
|
||||
expect_text: route.expect_text.as_deref(),
|
||||
reset_url_when_absent: false,
|
||||
},
|
||||
state,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_component_target<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
component_ref: &str,
|
||||
ensure_loaded: bool,
|
||||
state: &mut ExecutionState,
|
||||
) -> Result<(), ZhihuNavigationError> {
|
||||
let component = resolve_component(catalog, component_ref)?;
|
||||
|
||||
if let Some(entry_route_ref) = component.entry_route_ref.as_deref() {
|
||||
run_route(
|
||||
transport,
|
||||
browser_tool,
|
||||
catalog,
|
||||
entry_route_ref,
|
||||
false,
|
||||
state,
|
||||
)?;
|
||||
}
|
||||
|
||||
let expected_domain = resolve_domain(catalog, &component.domain_ref)?;
|
||||
send_log(
|
||||
transport,
|
||||
&format!("click {}", component.title),
|
||||
component_ref,
|
||||
)?;
|
||||
let click_result = invoke_browser_action(
|
||||
browser_tool,
|
||||
Action::Click,
|
||||
build_click_params(component.selector.as_str(), component.wait_after_ms),
|
||||
expected_domain.as_str(),
|
||||
component_ref,
|
||||
)?;
|
||||
|
||||
if component.capture_url {
|
||||
if let Some(url) = extract_url(&click_result.data) {
|
||||
state.final_url = Some(url);
|
||||
}
|
||||
}
|
||||
|
||||
if ensure_loaded {
|
||||
let result_domain_ref = component
|
||||
.result_domain_ref
|
||||
.as_deref()
|
||||
.unwrap_or(component.domain_ref.as_str());
|
||||
let result_domain = resolve_domain(catalog, result_domain_ref)?;
|
||||
run_post_action_checks(
|
||||
transport,
|
||||
browser_tool,
|
||||
PostActionChecks {
|
||||
expected_domain: result_domain.as_str(),
|
||||
wait_selector: component.wait_selector.as_deref(),
|
||||
wait_timeout_ms: component.wait_timeout_ms,
|
||||
expect_selector: component.expect_selector.as_deref(),
|
||||
expect_text: component.expect_text.as_deref(),
|
||||
reset_url_when_absent: !component.capture_url,
|
||||
},
|
||||
state,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_flow<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
flow_ref: &str,
|
||||
state: &mut ExecutionState,
|
||||
) -> Result<(), ZhihuNavigationError> {
|
||||
let flow = resolve_flow(catalog, flow_ref)?;
|
||||
|
||||
for step in &flow.steps {
|
||||
send_log(transport, &step.log_message, step.name.as_str())?;
|
||||
let action = parse_action(&step.action)?;
|
||||
let is_navigate = matches!(action, Action::Navigate);
|
||||
let (expected_domain, params, fallback_url) =
|
||||
build_flow_step_action(catalog, step, &action)?;
|
||||
let result = invoke_browser_action(
|
||||
browser_tool,
|
||||
action,
|
||||
params,
|
||||
expected_domain.as_str(),
|
||||
step.name.as_str(),
|
||||
)?;
|
||||
|
||||
if is_navigate {
|
||||
state.final_url = Some(
|
||||
extract_url(&result.data)
|
||||
.or(fallback_url.clone())
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
} else if step.capture_url {
|
||||
if let Some(url) = extract_url(&result.data) {
|
||||
state.final_url = Some(url);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(expected_text) = step.expect_text.as_deref() {
|
||||
let actual = extract_content(&result.data);
|
||||
if !actual.contains(expected_text) {
|
||||
return Err(ZhihuNavigationError::ExpectedTextMissing {
|
||||
step: step.name.clone(),
|
||||
expected: expected_text.to_string(),
|
||||
actual,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_flow_step_action(
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
step: &ZhihuFlowStep,
|
||||
action: &Action,
|
||||
) -> Result<(String, Value, Option<String>), ZhihuNavigationError> {
|
||||
match action {
|
||||
Action::Navigate => {
|
||||
let route_ref = step
|
||||
.route_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingRouteRef(step.name.clone()))?;
|
||||
let route = resolve_route(catalog, route_ref)?;
|
||||
let domain_key = step
|
||||
.expected_domain
|
||||
.as_deref()
|
||||
.unwrap_or(route.domain_ref.as_str());
|
||||
let expected_domain = resolve_domain(catalog, domain_key)?;
|
||||
Ok((
|
||||
expected_domain,
|
||||
json!({ "url": route.url }),
|
||||
Some(route.url.clone()),
|
||||
))
|
||||
}
|
||||
Action::Click => {
|
||||
let component_ref = step
|
||||
.component_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
|
||||
let component = resolve_component(catalog, component_ref)?;
|
||||
let domain_key = step
|
||||
.expected_domain
|
||||
.as_deref()
|
||||
.unwrap_or(component.domain_ref.as_str());
|
||||
let expected_domain = resolve_domain(catalog, domain_key)?;
|
||||
let wait_after_ms = step.wait_after_ms.or(component.wait_after_ms);
|
||||
Ok((
|
||||
expected_domain,
|
||||
build_click_params(component.selector.as_str(), wait_after_ms),
|
||||
None,
|
||||
))
|
||||
}
|
||||
Action::WaitForSelector => {
|
||||
let component_ref = step
|
||||
.component_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
|
||||
let component = resolve_component(catalog, component_ref)?;
|
||||
let domain_key = step
|
||||
.expected_domain
|
||||
.as_deref()
|
||||
.unwrap_or(component.domain_ref.as_str());
|
||||
let expected_domain = resolve_domain(catalog, domain_key)?;
|
||||
Ok((
|
||||
expected_domain,
|
||||
json!({
|
||||
"selector": component.selector,
|
||||
"timeout_ms": step.timeout_ms.unwrap_or(component.wait_timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS)),
|
||||
}),
|
||||
None,
|
||||
))
|
||||
}
|
||||
Action::GetText => {
|
||||
let component_ref = step
|
||||
.component_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
|
||||
let component = resolve_component(catalog, component_ref)?;
|
||||
let domain_key = step
|
||||
.expected_domain
|
||||
.as_deref()
|
||||
.unwrap_or(component.domain_ref.as_str());
|
||||
let expected_domain = resolve_domain(catalog, domain_key)?;
|
||||
Ok((
|
||||
expected_domain,
|
||||
json!({ "selector": component.selector }),
|
||||
None,
|
||||
))
|
||||
}
|
||||
Action::GetHtml => {
|
||||
let component_ref = step
|
||||
.component_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
|
||||
let component = resolve_component(catalog, component_ref)?;
|
||||
let domain_key = step
|
||||
.expected_domain
|
||||
.as_deref()
|
||||
.unwrap_or(component.domain_ref.as_str());
|
||||
let expected_domain = resolve_domain(catalog, domain_key)?;
|
||||
Ok((
|
||||
expected_domain,
|
||||
json!({ "selector": component.selector, "outer": true }),
|
||||
None,
|
||||
))
|
||||
}
|
||||
Action::ScrollTo => {
|
||||
let component_ref = step
|
||||
.component_ref
|
||||
.as_deref()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingComponentRef(step.name.clone()))?;
|
||||
let component = resolve_component(catalog, component_ref)?;
|
||||
let domain_key = step
|
||||
.expected_domain
|
||||
.as_deref()
|
||||
.unwrap_or(component.domain_ref.as_str());
|
||||
let expected_domain = resolve_domain(catalog, domain_key)?;
|
||||
Ok((
|
||||
expected_domain,
|
||||
json!({ "selector": component.selector }),
|
||||
None,
|
||||
))
|
||||
}
|
||||
other => Err(ZhihuNavigationError::UnknownAction(
|
||||
other.as_str().to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn run_post_action_checks<T: Transport>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
checks: PostActionChecks<'_>,
|
||||
state: &mut ExecutionState,
|
||||
) -> Result<(), ZhihuNavigationError> {
|
||||
if let Some(selector) = checks.wait_selector {
|
||||
send_log(
|
||||
transport,
|
||||
&format!("wait for {selector}"),
|
||||
"wait_for_selector",
|
||||
)?;
|
||||
let wait_result = invoke_browser_action(
|
||||
browser_tool,
|
||||
Action::WaitForSelector,
|
||||
json!({
|
||||
"selector": selector,
|
||||
"timeout_ms": checks.wait_timeout_ms.unwrap_or(DEFAULT_WAIT_TIMEOUT_MS),
|
||||
}),
|
||||
checks.expected_domain,
|
||||
"wait_for_selector",
|
||||
)?;
|
||||
if let Some(url) = extract_url(&wait_result.data) {
|
||||
state.final_url = Some(url);
|
||||
} else if checks.reset_url_when_absent {
|
||||
state.final_url = None;
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(selector), Some(expected_text)) = (checks.expect_selector, checks.expect_text) {
|
||||
send_log(transport, &format!("verify {selector}"), "verify_text")?;
|
||||
let text_result = invoke_browser_action(
|
||||
browser_tool,
|
||||
Action::GetText,
|
||||
json!({ "selector": selector }),
|
||||
checks.expected_domain,
|
||||
"verify_text",
|
||||
)?;
|
||||
if let Some(url) = extract_url(&text_result.data) {
|
||||
state.final_url = Some(url);
|
||||
} else if checks.reset_url_when_absent {
|
||||
state.final_url = None;
|
||||
}
|
||||
let actual = extract_content(&text_result.data);
|
||||
if !actual.contains(expected_text) {
|
||||
return Err(ZhihuNavigationError::ExpectedTextMissing {
|
||||
step: "verify_text".to_string(),
|
||||
expected: expected_text.to_string(),
|
||||
actual,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn send_log<T: Transport>(
|
||||
transport: &T,
|
||||
message: &str,
|
||||
step: &str,
|
||||
) -> Result<(), ZhihuNavigationError> {
|
||||
transport
|
||||
.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: message.to_string(),
|
||||
})
|
||||
.map_err(|err| ZhihuNavigationError::BrowserActionFailed {
|
||||
step: step.to_string(),
|
||||
message: err.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn invoke_browser_action<T: Transport>(
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
action: Action,
|
||||
params: Value,
|
||||
expected_domain: &str,
|
||||
step: &str,
|
||||
) -> Result<CommandOutput, ZhihuNavigationError> {
|
||||
let result = browser_tool
|
||||
.invoke(action, params, expected_domain)
|
||||
.map_err(|err| ZhihuNavigationError::BrowserActionFailed {
|
||||
step: step.to_string(),
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
if !result.success {
|
||||
return Err(ZhihuNavigationError::BrowserActionFailed {
|
||||
step: step.to_string(),
|
||||
message: result.data.to_string(),
|
||||
});
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn build_click_params(selector: &str, wait_after_ms: Option<u64>) -> Value {
|
||||
let mut params = serde_json::Map::new();
|
||||
params.insert("selector".to_string(), Value::String(selector.to_string()));
|
||||
if let Some(wait_after_ms) = wait_after_ms {
|
||||
params.insert("wait_after".to_string(), Value::from(wait_after_ms));
|
||||
}
|
||||
Value::Object(params)
|
||||
}
|
||||
|
||||
fn parse_action(name: &str) -> Result<Action, ZhihuNavigationError> {
|
||||
match name {
|
||||
"click" => Ok(Action::Click),
|
||||
"navigate" => Ok(Action::Navigate),
|
||||
"getText" => Ok(Action::GetText),
|
||||
"getHtml" => Ok(Action::GetHtml),
|
||||
"waitForSelector" => Ok(Action::WaitForSelector),
|
||||
"scrollTo" => Ok(Action::ScrollTo),
|
||||
other => Err(ZhihuNavigationError::UnknownAction(other.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_target<'a>(
|
||||
catalog: &'a ZhihuNavigationCatalog,
|
||||
target_key: &str,
|
||||
) -> Result<&'a ZhihuTargetDefinition, ZhihuNavigationError> {
|
||||
catalog
|
||||
.targets
|
||||
.get(target_key)
|
||||
.ok_or_else(|| ZhihuNavigationError::UnknownTarget(target_key.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_route<'a>(
|
||||
catalog: &'a ZhihuNavigationCatalog,
|
||||
route_ref: &str,
|
||||
) -> Result<&'a ZhihuRouteDefinition, ZhihuNavigationError> {
|
||||
catalog
|
||||
.routes
|
||||
.get(route_ref)
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingRoute(route_ref.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_component<'a>(
|
||||
catalog: &'a ZhihuNavigationCatalog,
|
||||
component_ref: &str,
|
||||
) -> Result<&'a ZhihuComponentDefinition, ZhihuNavigationError> {
|
||||
catalog
|
||||
.components
|
||||
.get(component_ref)
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingComponent(component_ref.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_flow<'a>(
|
||||
catalog: &'a ZhihuNavigationCatalog,
|
||||
flow_ref: &str,
|
||||
) -> Result<&'a ZhihuFlowDefinition, ZhihuNavigationError> {
|
||||
catalog
|
||||
.flows
|
||||
.get(flow_ref)
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingFlow(flow_ref.to_string()))
|
||||
}
|
||||
|
||||
fn resolve_domain(
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
key: &str,
|
||||
) -> Result<String, ZhihuNavigationError> {
|
||||
catalog
|
||||
.domains
|
||||
.get(key)
|
||||
.cloned()
|
||||
.ok_or_else(|| ZhihuNavigationError::MissingDomain(key.to_string()))
|
||||
}
|
||||
|
||||
fn best_target_match_score(
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
target: &ZhihuTargetDefinition,
|
||||
normalized_instruction: &str,
|
||||
) -> usize {
|
||||
let best_len = collect_target_aliases(catalog, target)
|
||||
.into_iter()
|
||||
.map(|alias| normalize_text(alias.as_str()))
|
||||
.filter(|alias| !alias.is_empty() && normalized_instruction.contains(alias))
|
||||
.map(|alias| alias.len())
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
|
||||
if best_len == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
best_len * 100 + match_bonus(target, normalized_instruction)
|
||||
}
|
||||
|
||||
fn collect_target_aliases(
|
||||
catalog: &ZhihuNavigationCatalog,
|
||||
target: &ZhihuTargetDefinition,
|
||||
) -> Vec<String> {
|
||||
let mut aliases = Vec::new();
|
||||
aliases.push(target.title.clone());
|
||||
aliases.extend(target.aliases.iter().cloned());
|
||||
|
||||
if let Some(route_ref) = target.route_ref.as_deref() {
|
||||
if let Some(route) = catalog.routes.get(route_ref) {
|
||||
aliases.push(route.title.clone());
|
||||
aliases.extend(route.aliases.iter().cloned());
|
||||
}
|
||||
}
|
||||
if let Some(component_ref) = target.component_ref.as_deref() {
|
||||
if let Some(component) = catalog.components.get(component_ref) {
|
||||
aliases.push(component.title.clone());
|
||||
aliases.extend(component.aliases.iter().cloned());
|
||||
}
|
||||
}
|
||||
|
||||
aliases.retain(|alias| !alias.trim().is_empty());
|
||||
aliases.sort();
|
||||
aliases.dedup();
|
||||
aliases
|
||||
}
|
||||
|
||||
fn match_bonus(target: &ZhihuTargetDefinition, normalized_instruction: &str) -> usize {
|
||||
let mut bonus = 0;
|
||||
let summary_kind = target_summary_kind(target);
|
||||
|
||||
if normalized_instruction.contains("页面") && summary_kind == ZhihuSummaryKind::Page {
|
||||
bonus += 20;
|
||||
}
|
||||
if ["按钮", "入口"]
|
||||
.iter()
|
||||
.any(|token| normalized_instruction.contains(token))
|
||||
&& summary_kind == ZhihuSummaryKind::Entry
|
||||
{
|
||||
bonus += 20;
|
||||
}
|
||||
if ["菜单", "下拉"]
|
||||
.iter()
|
||||
.any(|token| normalized_instruction.contains(token))
|
||||
&& summary_kind == ZhihuSummaryKind::Menu
|
||||
{
|
||||
bonus += 20;
|
||||
}
|
||||
|
||||
bonus
|
||||
}
|
||||
|
||||
fn target_summary_kind(target: &ZhihuTargetDefinition) -> ZhihuSummaryKind {
|
||||
target.summary_kind.unwrap_or(match target.kind {
|
||||
ZhihuTargetKind::Route => ZhihuSummaryKind::Page,
|
||||
ZhihuTargetKind::Component => ZhihuSummaryKind::Entry,
|
||||
ZhihuTargetKind::Flow => ZhihuSummaryKind::Navigation,
|
||||
})
|
||||
}
|
||||
|
||||
fn build_summary(target: &ZhihuTargetDefinition, final_url: &str) -> String {
|
||||
match target_summary_kind(target) {
|
||||
ZhihuSummaryKind::Page => {
|
||||
format!("知乎页面已打开:{} ({final_url})", target.title)
|
||||
}
|
||||
ZhihuSummaryKind::Entry => {
|
||||
if final_url.is_empty() {
|
||||
format!("知乎入口已打开:{}", target.title)
|
||||
} else {
|
||||
format!("知乎入口已打开:{} ({final_url})", target.title)
|
||||
}
|
||||
}
|
||||
ZhihuSummaryKind::Menu => format!("知乎菜单已打开:{}", target.title),
|
||||
ZhihuSummaryKind::Navigation => {
|
||||
if final_url.is_empty() {
|
||||
format!("知乎导航已完成:{}", target.title)
|
||||
} else {
|
||||
format!("知乎导航已完成:{} ({final_url})", target.title)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn looks_like_navigation_intent(instruction: &str) -> bool {
|
||||
let normalized = normalize_text(instruction);
|
||||
let has_platform = ["知乎", "专栏", "创作中心", "创作者中心"]
|
||||
.iter()
|
||||
.any(|token| normalized.contains(token));
|
||||
let has_verb = ["打开", "进入", "跳转", "前往", "去", "点开", "展开", "切到"]
|
||||
.iter()
|
||||
.any(|token| normalized.contains(token));
|
||||
has_platform && has_verb
|
||||
}
|
||||
|
||||
fn normalize_text(text: &str) -> String {
|
||||
text.chars()
|
||||
.filter(|ch| {
|
||||
!ch.is_whitespace()
|
||||
&& !matches!(
|
||||
ch,
|
||||
',' | '。'
|
||||
| ':'
|
||||
| ';'
|
||||
| '!'
|
||||
| '?'
|
||||
| '、'
|
||||
| '('
|
||||
| ')'
|
||||
| '【'
|
||||
| '】'
|
||||
| ','
|
||||
| '.'
|
||||
| ':'
|
||||
| ';'
|
||||
| '!'
|
||||
| '?'
|
||||
| '('
|
||||
| ')'
|
||||
| '['
|
||||
| ']'
|
||||
| '"'
|
||||
| '\''
|
||||
| '/'
|
||||
| '\\'
|
||||
| '-'
|
||||
| '_'
|
||||
)
|
||||
})
|
||||
.flat_map(|ch| ch.to_lowercase())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_content(data: &Value) -> String {
|
||||
data.get("text")
|
||||
.and_then(Value::as_str)
|
||||
.or_else(|| data.get("html").and_then(Value::as_str))
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn extract_url(data: &Value) -> Option<String> {
|
||||
data.get("url")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::trim)
|
||||
.filter(|url| !url.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
}
|
||||
@@ -17,6 +17,7 @@ impl MockTransport {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn sent_messages(&self) -> Vec<AgentMessage> {
|
||||
self.sent.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
@@ -26,7 +26,9 @@ fn test_policy() -> MacPolicy {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn build_adapter(messages: Vec<BrowserMessage>) -> (Arc<MockTransport>, ZeroClawBrowserTool<MockTransport>) {
|
||||
fn build_adapter(
|
||||
messages: Vec<BrowserMessage>,
|
||||
) -> (Arc<MockTransport>, ZeroClawBrowserTool<MockTransport>) {
|
||||
let transport = Arc::new(MockTransport::new(messages));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
@@ -193,13 +195,11 @@ async fn zeroclaw_browser_tool_keeps_domain_validation_in_mac_policy() {
|
||||
assert!(!result.success);
|
||||
assert!(result.output.is_empty());
|
||||
assert_eq!(transport.sent_messages().len(), 0);
|
||||
assert!(
|
||||
result
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("domain is not allowed")
|
||||
);
|
||||
assert!(result
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("domain is not allowed"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -232,25 +232,19 @@ async fn zeroclaw_browser_tool_rejects_missing_required_action_parameters() {
|
||||
assert!(!missing_text_selector.success);
|
||||
assert!(!missing_navigate_url.success);
|
||||
assert_eq!(transport.sent_messages().len(), 0);
|
||||
assert!(
|
||||
missing_click_selector
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("click requires selector")
|
||||
);
|
||||
assert!(
|
||||
missing_text_selector
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("getText requires selector")
|
||||
);
|
||||
assert!(
|
||||
missing_navigate_url
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("navigate requires url")
|
||||
);
|
||||
assert!(missing_click_selector
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("click requires selector"));
|
||||
assert!(missing_text_selector
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("getText requires selector"));
|
||||
assert!(missing_navigate_url
|
||||
.error
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.contains("navigate requires url"));
|
||||
}
|
||||
|
||||
@@ -3,9 +3,7 @@ use std::path::Path;
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
use sgclaw::compat::config_adapter::{
|
||||
build_zeroclaw_config,
|
||||
build_zeroclaw_config_from_settings,
|
||||
zeroclaw_workspace_dir,
|
||||
build_zeroclaw_config, build_zeroclaw_config_from_settings, zeroclaw_workspace_dir,
|
||||
};
|
||||
use sgclaw::config::DeepSeekSettings;
|
||||
use uuid::Uuid;
|
||||
@@ -49,11 +47,17 @@ fn zeroclaw_config_adapter_uses_deterministic_workspace_dir() {
|
||||
let workspace_dir = zeroclaw_workspace_dir(Path::new("/var/lib/sgclaw"));
|
||||
let config = build_zeroclaw_config_from_settings(Path::new("/var/lib/sgclaw"), &settings);
|
||||
|
||||
assert_eq!(workspace_dir, Path::new("/var/lib/sgclaw/.sgclaw-zeroclaw-workspace"));
|
||||
assert_eq!(
|
||||
workspace_dir,
|
||||
Path::new("/var/lib/sgclaw/.sgclaw-zeroclaw-workspace")
|
||||
);
|
||||
assert_eq!(config.workspace_dir, workspace_dir);
|
||||
assert_eq!(config.default_provider.as_deref(), Some("deepseek"));
|
||||
assert_eq!(config.default_model.as_deref(), Some("deepseek-reasoner"));
|
||||
assert_eq!(config.api_url.as_deref(), Some("https://proxy.example.com/v1"));
|
||||
assert_eq!(
|
||||
config.api_url.as_deref(),
|
||||
Some("https://proxy.example.com/v1")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -3,7 +3,7 @@ mod common;
|
||||
use std::fs;
|
||||
use std::io::{Read, Write};
|
||||
use std::net::TcpListener;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
@@ -11,9 +11,7 @@ use std::time::Duration;
|
||||
use common::MockTransport;
|
||||
use serde_json::{json, Value};
|
||||
use sgclaw::agent::{
|
||||
handle_browser_message,
|
||||
handle_browser_message_with_context,
|
||||
AgentRuntimeContext,
|
||||
handle_browser_message, handle_browser_message_with_context, AgentRuntimeContext,
|
||||
};
|
||||
use sgclaw::compat::runtime::{execute_task, CompatTaskContext};
|
||||
use sgclaw::config::DeepSeekSettings;
|
||||
@@ -48,7 +46,7 @@ fn temp_workspace_root() -> PathBuf {
|
||||
root
|
||||
}
|
||||
|
||||
fn write_deepseek_config(root: &PathBuf, api_key: &str, base_url: &str, model: &str) -> PathBuf {
|
||||
fn write_deepseek_config(root: &Path, api_key: &str, base_url: &str, model: &str) -> PathBuf {
|
||||
let config_path = root.join("sgclaw_config.json");
|
||||
fs::write(
|
||||
&config_path,
|
||||
@@ -94,7 +92,7 @@ fn start_fake_deepseek_server(
|
||||
let payload = response.to_string();
|
||||
let reply = format!(
|
||||
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
|
||||
payload.as_bytes().len(),
|
||||
payload.len(),
|
||||
payload
|
||||
);
|
||||
stream.write_all(reply.as_bytes()).unwrap();
|
||||
@@ -281,7 +279,8 @@ fn compat_runtime_uses_zeroclaw_provider_path_and_executes_browser_actions() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handle_browser_message_prefers_compat_runtime_for_supported_instruction_when_deepseek_is_configured() {
|
||||
fn handle_browser_message_prefers_compat_runtime_for_supported_instruction_when_deepseek_is_configured(
|
||||
) {
|
||||
let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner());
|
||||
|
||||
let first_response = json!({
|
||||
@@ -643,11 +642,9 @@ fn compat_runtime_includes_prior_turns_in_follow_up_provider_request() {
|
||||
|
||||
assert_eq!(summary, "已在知乎搜索天气");
|
||||
assert!(first_request_messages.iter().any(|message| {
|
||||
message["role"] == json!("user")
|
||||
&& message["content"] == json!("打开百度搜索天气")
|
||||
message["role"] == json!("user") && message["content"] == json!("打开百度搜索天气")
|
||||
}));
|
||||
assert!(first_request_messages.iter().any(|message| {
|
||||
message["role"] == json!("assistant")
|
||||
&& message["content"] == json!("已在百度搜索天气")
|
||||
message["role"] == json!("assistant") && message["content"] == json!("已在百度搜索天气")
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -60,8 +60,5 @@ fn deepseek_request_shape_matches_openai_compatible_chat_format() {
|
||||
assert_eq!(serialized["messages"][0]["role"], "system");
|
||||
assert_eq!(serialized["messages"][1]["content"], "打开百度搜索天气");
|
||||
assert_eq!(serialized["tools"][0]["type"], "function");
|
||||
assert_eq!(
|
||||
serialized["tools"][0]["function"]["name"],
|
||||
"browser_action"
|
||||
);
|
||||
assert_eq!(serialized["tools"][0]["function"]["name"], "browser_action");
|
||||
}
|
||||
|
||||
@@ -12,9 +12,9 @@ fn test_policy() -> MacPolicy {
|
||||
MacPolicy::from_json_str(
|
||||
r#"{
|
||||
"version": "1.0",
|
||||
"domains": { "allowed": ["oa.example.com", "www.baidu.com"] },
|
||||
"domains": { "allowed": ["oa.example.com", "www.baidu.com", "www.zhihu.com", "zhuanlan.zhihu.com"] },
|
||||
"pipe_actions": {
|
||||
"allowed": ["click", "type", "navigate", "getText"],
|
||||
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
|
||||
"blocked": ["eval", "executeJsInPage"]
|
||||
}
|
||||
}"#,
|
||||
@@ -120,3 +120,116 @@ fn submit_task_sends_three_commands_and_finishes_with_task_complete() {
|
||||
if *success && summary == "已在百度搜索天气"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_zhihu_skill_short_circuits_before_planner_fallback() {
|
||||
let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response {
|
||||
seq: 1,
|
||||
success: true,
|
||||
data: serde_json::json!({ "url": "https://www.zhihu.com/creator/analytics/work/all" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 20,
|
||||
},
|
||||
}]));
|
||||
let tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
handle_browser_message(
|
||||
transport.as_ref(),
|
||||
&tool,
|
||||
BrowserMessage::SubmitTask {
|
||||
instruction:
|
||||
r#"skill:zhihu_navigate {"page":"content_analysis","ensure_loaded":false}"#
|
||||
.to_string(),
|
||||
conversation_id: String::new(),
|
||||
messages: vec![],
|
||||
page_url: String::new(),
|
||||
page_title: String::new(),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(sent.len(), 3);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info"
|
||||
&& message == "navigate https://www.zhihu.com/creator/analytics/work/all"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 1
|
||||
&& action == &Action::Navigate
|
||||
&& security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::TaskComplete { success, summary }
|
||||
if *success
|
||||
&& summary
|
||||
== "知乎页面已打开:内容分析 (https://www.zhihu.com/creator/analytics/work/all)"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn natural_language_zhihu_navigation_short_circuits_before_planner_fallback() {
|
||||
let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response {
|
||||
seq: 1,
|
||||
success: true,
|
||||
data: serde_json::json!({ "url": "https://www.zhihu.com/" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 20,
|
||||
},
|
||||
}]));
|
||||
let tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
handle_browser_message(
|
||||
transport.as_ref(),
|
||||
&tool,
|
||||
BrowserMessage::SubmitTask {
|
||||
instruction: "打开知乎首页".to_string(),
|
||||
conversation_id: String::new(),
|
||||
messages: vec![],
|
||||
page_url: String::new(),
|
||||
page_title: String::new(),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(sent.len(), 3);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 1
|
||||
&& action == &Action::Navigate
|
||||
&& security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::TaskComplete { success, summary }
|
||||
if *success && summary == "知乎页面已打开:首页 (https://www.zhihu.com/)"
|
||||
));
|
||||
}
|
||||
|
||||
441
tests/skill_router_test.rs
Normal file
441
tests/skill_router_test.rs
Normal file
@@ -0,0 +1,441 @@
|
||||
use sgclaw::skill::router::{route_instruction, RoutedSkill, RouterError};
|
||||
|
||||
#[test]
|
||||
fn route_instruction_parses_explicit_zhihu_skill() {
|
||||
let routed = route_instruction(
|
||||
r#"skill:zhihu_write {"title":"自动发文能力测试","body":"第一段\n\n第二段","publish":false}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuWrite(req))
|
||||
if req.title == "自动发文能力测试"
|
||||
&& req.body == "第一段\n\n第二段"
|
||||
&& !req.publish
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_parses_explicit_zhihu_hotlist_collect_skill() {
|
||||
let routed = route_instruction(
|
||||
r#"skill:zhihu_hotlist_collect {"top_n":5,"comments_per_item":8,"store_dir":"data/zhihu_hotlist"}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuHotlistCollect(req))
|
||||
if req.top_n == 5
|
||||
&& req.comments_per_item == 8
|
||||
&& req.store_dir.as_deref() == Some("data/zhihu_hotlist")
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_parses_explicit_zhihu_hotlist_report_skill() {
|
||||
let routed =
|
||||
route_instruction(r#"skill:zhihu_hotlist_report {"snapshot_id":"snap-1","top_n":3}"#)
|
||||
.unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuHotlistReport(req))
|
||||
if req.snapshot_id.as_deref() == Some("snap-1")
|
||||
&& req.top_n == 3
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_parses_explicit_zhihu_navigation_skill() {
|
||||
let routed = route_instruction(
|
||||
r#"skill:zhihu_navigate {"page":"content_analysis","ensure_loaded":true}"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "content_analysis" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_content_analysis_natural_language() {
|
||||
let routed = route_instruction("帮我打开知乎中的内容分析页面").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "content_analysis" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_short_zhihu_content_analysis_phrase() {
|
||||
let routed = route_instruction("打开知乎内容分析").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "content_analysis" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_income_analysis_natural_language() {
|
||||
let routed = route_instruction("打开知乎收益分析页面").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "income_analysis" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_home_natural_language() {
|
||||
let routed = route_instruction("打开知乎首页").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "home" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_hot_list_natural_language() {
|
||||
let routed = route_instruction("打开知乎热榜页面").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "hot_list" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_column_home_natural_language() {
|
||||
let routed = route_instruction("打开知乎专栏页").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "column_home" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_question_page_natural_language() {
|
||||
let routed = route_instruction("打开知乎问题页").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "question_page" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_messages_page_natural_language() {
|
||||
let routed = route_instruction("打开知乎消息分栏").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "messages_page" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_messages_all_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎消息分栏全部私信").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "messages_all_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_messages_unread_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎消息分栏未读消息").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "messages_unread_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_messages_strangers_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎消息分栏陌生人消息").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "messages_strangers_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_messages_settings_menu_natural_language() {
|
||||
let routed = route_instruction("打开知乎消息设置菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "messages_settings_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_page_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知分栏").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_page" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_replies_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知分栏回复我的").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_replies_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_votes_favorites_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知分栏赞同与收藏").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_votes_favorites_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_follows_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知分栏关注我的").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_follows_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_system_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知分栏系统通知").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_system_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_settings_menu_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知设置菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_settings_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_profile_page_natural_language() {
|
||||
let routed = route_instruction("打开知乎个人主页").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "profile_page" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_profile_answers_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎个人主页回答分栏").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "profile_answers_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_profile_followers_tab_natural_language() {
|
||||
let routed = route_instruction("打开知乎个人主页粉丝分栏").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "profile_followers_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_account_settings_natural_language() {
|
||||
let routed = route_instruction("打开知乎账号设置菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "settings_account_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_privacy_settings_natural_language() {
|
||||
let routed = route_instruction("打开知乎隐私设置菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "settings_privacy_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_security_settings_natural_language() {
|
||||
let routed = route_instruction("打开知乎安全设置菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "settings_security_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_search_filter_menu_natural_language() {
|
||||
let routed = route_instruction("打开知乎搜索筛选菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "search_filter_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_context_more_menu_natural_language() {
|
||||
let routed = route_instruction("打开知乎更多菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "context_more_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_menu_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知菜单").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_menu" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_notifications_entry_natural_language() {
|
||||
let routed = route_instruction("打开知乎通知按钮").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "notifications_entry" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_search_box_natural_language() {
|
||||
let routed = route_instruction("打开知乎搜索框").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "search_box" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_creator_write_button_natural_language() {
|
||||
let routed = route_instruction("打开知乎创作中心写文章按钮").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "creator_write_button" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_open_hot_from_home_flow_natural_language() {
|
||||
let routed = route_instruction("从知乎首页进入热榜").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "open_hot_from_home" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_returns_none_for_non_skill_text() {
|
||||
let routed = route_instruction("打开百度搜索天气").unwrap();
|
||||
|
||||
assert!(routed.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_returns_none_for_vague_zhihu_navigation_text() {
|
||||
let routed = route_instruction("打开知乎").unwrap();
|
||||
|
||||
assert!(routed.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_returns_none_for_ambiguous_zhihu_notification_phrase() {
|
||||
let routed = route_instruction("打开知乎通知").unwrap();
|
||||
|
||||
assert!(routed.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_routes_zhihu_hot_button_phrase_to_hot_tab() {
|
||||
let routed = route_instruction("打开知乎热榜按钮").unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
routed,
|
||||
Some(RoutedSkill::ZhihuNavigate(req))
|
||||
if req.page == "hot_tab" && req.ensure_loaded
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn route_instruction_rejects_unknown_skill_name() {
|
||||
let err = route_instruction(r#"skill:unknown {"x":1}"#).unwrap_err();
|
||||
|
||||
assert!(matches!(err, RouterError::UnknownSkill(name) if name == "unknown"));
|
||||
}
|
||||
403
tests/zhihu_hotlist_skill_test.rs
Normal file
403
tests/zhihu_hotlist_skill_test.rs
Normal file
@@ -0,0 +1,403 @@
|
||||
mod common;
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use common::MockTransport;
|
||||
use sgclaw::pipe::{BrowserMessage, BrowserPipeTool, Timing};
|
||||
use sgclaw::security::MacPolicy;
|
||||
use sgclaw::skill::zhihu_hotlist::{
|
||||
execute_collect, execute_report, load_flow, ZhihuHotlistCollectRequest,
|
||||
ZhihuHotlistReportRequest,
|
||||
};
|
||||
use sgclaw::skill::zhihu_hotlist_store::load_latest_snapshot;
|
||||
|
||||
fn test_policy() -> MacPolicy {
|
||||
MacPolicy::from_json_str(
|
||||
r#"{
|
||||
"version": "1.0",
|
||||
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
|
||||
"pipe_actions": {
|
||||
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
|
||||
"blocked": []
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn temp_store_dir(label: &str) -> PathBuf {
|
||||
let unique = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos();
|
||||
std::env::temp_dir().join(format!("sgclaw-{label}-{unique}"))
|
||||
}
|
||||
|
||||
fn hotlist_html() -> String {
|
||||
r#"
|
||||
<html>
|
||||
<body>
|
||||
<main>
|
||||
<section data-hot-item>
|
||||
<h2><a href="/question/123">第一条热榜</a></h2>
|
||||
<div class="HotItem-content">第一条摘要</div>
|
||||
<div class="HotItem-hot">1234 热度</div>
|
||||
</section>
|
||||
<section data-hot-item>
|
||||
<h2><a href="/question/456">第二条热榜</a></h2>
|
||||
<div class="HotItem-content">第二条摘要</div>
|
||||
<div class="HotItem-hot">5.6 万热度</div>
|
||||
</section>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
"#
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn comment_html(
|
||||
first_reply: u64,
|
||||
first_upvote: u64,
|
||||
second_reply: u64,
|
||||
second_upvote: u64,
|
||||
) -> String {
|
||||
format!(
|
||||
r#"
|
||||
<html>
|
||||
<body>
|
||||
<div class="CommentListV2">
|
||||
<div class="CommentItemV2" data-comment-id="comment-1">
|
||||
<button>回复 {first_reply}</button>
|
||||
<button>赞同 {first_upvote}</button>
|
||||
<button>收藏 2</button>
|
||||
<button>红心 1</button>
|
||||
</div>
|
||||
<div class="CommentItemV2" data-comment-id="comment-2">
|
||||
<button>回复 {second_reply}</button>
|
||||
<button>赞同 {second_upvote}</button>
|
||||
<button>收藏 4</button>
|
||||
<button>红心 3</button>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"#
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_hotlist_flow_preserves_expected_selectors() {
|
||||
let flow = load_flow().unwrap();
|
||||
|
||||
assert_eq!(flow.hotlist_url, "https://www.zhihu.com/hot");
|
||||
assert_eq!(flow.domains["zhihu"], "www.zhihu.com");
|
||||
assert!(flow.selectors["hotlist_item"].contains("HotList-item"));
|
||||
assert!(flow.selectors["comment_metric"].contains("button"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_hotlist_collect_persists_snapshot_and_report_reads_latest() {
|
||||
let store_dir = temp_store_dir("hotlist-collect");
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
BrowserMessage::Response {
|
||||
seq: 1,
|
||||
success: true,
|
||||
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 2,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 3,
|
||||
success: true,
|
||||
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 4,
|
||||
success: true,
|
||||
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 5,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 6,
|
||||
success: true,
|
||||
data: serde_json::json!({ "scrolled": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 7,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 8,
|
||||
success: true,
|
||||
data: serde_json::json!({ "scrolled": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 9,
|
||||
success: true,
|
||||
data: serde_json::json!({ "html": comment_html(3, 15, 1, 8), "url": "https://www.zhihu.com/question/123" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 10,
|
||||
success: true,
|
||||
data: serde_json::json!({ "url": "https://www.zhihu.com/question/456" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 11,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 12,
|
||||
success: true,
|
||||
data: serde_json::json!({ "scrolled": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 13,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 14,
|
||||
success: true,
|
||||
data: serde_json::json!({ "scrolled": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 15,
|
||||
success: true,
|
||||
data: serde_json::json!({ "html": comment_html(5, 20, 4, 16), "url": "https://www.zhihu.com/question/456" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute_collect(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuHotlistCollectRequest {
|
||||
top_n: 2,
|
||||
comments_per_item: 2,
|
||||
store_dir: Some(store_dir.display().to_string()),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result.item_count, 2);
|
||||
assert!(result.summary.contains("知乎热榜快照已保存"));
|
||||
|
||||
let snapshot = load_latest_snapshot(&store_dir).unwrap();
|
||||
assert_eq!(snapshot.items.len(), 2);
|
||||
assert_eq!(snapshot.items[0].title, "第一条热榜");
|
||||
assert_eq!(snapshot.items[0].summary, "第一条摘要");
|
||||
assert_eq!(snapshot.items[0].heat_value, Some(1234));
|
||||
assert_eq!(snapshot.items[0].comment_metrics.len(), 2);
|
||||
assert_eq!(snapshot.items[0].comment_metrics[0].reply_count, Some(3));
|
||||
assert_eq!(snapshot.items[0].comment_metrics[0].upvote_count, Some(15));
|
||||
assert_eq!(snapshot.items[1].heat_value, Some(56_000));
|
||||
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 4);
|
||||
|
||||
let report = execute_report(ZhihuHotlistReportRequest {
|
||||
snapshot_id: Some(result.snapshot_id.clone()),
|
||||
store_dir: Some(store_dir.display().to_string()),
|
||||
top_n: 2,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert!(report.summary.contains("第一条热榜"));
|
||||
assert!(report.summary.contains("第二条热榜"));
|
||||
assert!(report.summary.contains("回复 4"));
|
||||
assert!(report.summary.contains("赞同 23"));
|
||||
|
||||
let _ = fs::remove_dir_all(&store_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_hotlist_collect_persists_partial_snapshot_when_comment_capture_fails() {
|
||||
let store_dir = temp_store_dir("hotlist-partial");
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
BrowserMessage::Response {
|
||||
seq: 1,
|
||||
success: true,
|
||||
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 2,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 3,
|
||||
success: true,
|
||||
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 4,
|
||||
success: true,
|
||||
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 5,
|
||||
success: true,
|
||||
data: serde_json::json!({ "ready": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 6,
|
||||
success: true,
|
||||
data: serde_json::json!({ "scrolled": true }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
BrowserMessage::Response {
|
||||
seq: 7,
|
||||
success: false,
|
||||
data: serde_json::json!({ "error": "comment list missing" }),
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
},
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute_collect(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuHotlistCollectRequest {
|
||||
top_n: 1,
|
||||
comments_per_item: 2,
|
||||
store_dir: Some(store_dir.display().to_string()),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let snapshot = load_latest_snapshot(&store_dir).unwrap();
|
||||
assert_eq!(result.item_count, 1);
|
||||
assert_eq!(snapshot.collection_stats.partial_items, 1);
|
||||
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 0);
|
||||
assert!(snapshot.items[0].comment_metrics.is_empty());
|
||||
|
||||
let _ = fs::remove_dir_all(&store_dir);
|
||||
}
|
||||
661
tests/zhihu_navigation_skill_test.rs
Normal file
661
tests/zhihu_navigation_skill_test.rs
Normal file
@@ -0,0 +1,661 @@
|
||||
mod common;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common::MockTransport;
|
||||
use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing};
|
||||
use sgclaw::security::MacPolicy;
|
||||
use sgclaw::skill::zhihu_navigation::{execute, load_catalog, ZhihuNavigateRequest};
|
||||
|
||||
fn test_policy() -> MacPolicy {
|
||||
MacPolicy::from_json_str(
|
||||
r#"{
|
||||
"version": "1.0",
|
||||
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
|
||||
"pipe_actions": {
|
||||
"allowed": ["click", "navigate", "getText", "waitForSelector"],
|
||||
"blocked": []
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn response(seq: u64, data: serde_json::Value) -> BrowserMessage {
|
||||
BrowserMessage::Response {
|
||||
seq,
|
||||
success: true,
|
||||
data,
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_catalog_preserves_confirmed_content_analysis_route() {
|
||||
let catalog = load_catalog().unwrap();
|
||||
|
||||
assert_eq!(catalog.domains["creator"], "www.zhihu.com");
|
||||
assert_eq!(
|
||||
catalog.routes["content_analysis"].url,
|
||||
"https://www.zhihu.com/creator/analytics/work/all"
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["content_analysis"].route_ref.as_deref(),
|
||||
Some("content_analysis")
|
||||
);
|
||||
assert!(catalog.routes["content_analysis"]
|
||||
.aliases
|
||||
.iter()
|
||||
.any(|alias| alias == "知乎内容分析页面"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_catalog_includes_top_level_navigation_targets() {
|
||||
let catalog = load_catalog().unwrap();
|
||||
|
||||
assert_eq!(catalog.routes["home"].url, "https://www.zhihu.com/");
|
||||
assert_eq!(catalog.routes["hot_list"].url, "https://www.zhihu.com/hot");
|
||||
assert_eq!(
|
||||
catalog.routes["column_home"].url,
|
||||
"https://zhuanlan.zhihu.com/"
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.routes["messages_page"].url,
|
||||
"https://www.zhihu.com/messages"
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.routes["notifications_page"].url,
|
||||
"https://www.zhihu.com/notifications"
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["messages_unread_tab"]
|
||||
.component_ref
|
||||
.as_deref(),
|
||||
Some("messages_tab_unread")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["notifications_replies_tab"]
|
||||
.component_ref
|
||||
.as_deref(),
|
||||
Some("notifications_tab_replies")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["notifications_settings_menu"]
|
||||
.component_ref
|
||||
.as_deref(),
|
||||
Some("notifications_settings_menu")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["profile_page"].flow_ref.as_deref(),
|
||||
Some("open_profile_from_avatar_menu")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["notifications_menu"].flow_ref.as_deref(),
|
||||
Some("open_notifications_menu")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["search_box"].component_ref.as_deref(),
|
||||
Some("top_nav_search")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.components["creator_write_button"]
|
||||
.result_domain_ref
|
||||
.as_deref(),
|
||||
Some("editor")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_catalog_includes_expanded_profile_and_settings_flows() {
|
||||
let catalog = load_catalog().unwrap();
|
||||
|
||||
assert_eq!(
|
||||
catalog.targets["profile_answers_tab"].flow_ref.as_deref(),
|
||||
Some("open_profile_answers_tab")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["profile_followers_tab"].flow_ref.as_deref(),
|
||||
Some("open_profile_followers_tab")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["settings_account_menu"].flow_ref.as_deref(),
|
||||
Some("open_account_settings_from_avatar_menu")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["settings_privacy_menu"].flow_ref.as_deref(),
|
||||
Some("open_privacy_settings_from_avatar_menu")
|
||||
);
|
||||
assert_eq!(
|
||||
catalog.targets["settings_security_menu"]
|
||||
.flow_ref
|
||||
.as_deref(),
|
||||
Some("open_security_settings_from_avatar_menu")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_content_analysis_page() {
|
||||
let transport = Arc::new(MockTransport::new(vec![response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator/analytics/work/all" }),
|
||||
)]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "content_analysis".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎页面已打开:内容分析 (https://www.zhihu.com/creator/analytics/work/all)"
|
||||
);
|
||||
assert_eq!(result.page, "content_analysis");
|
||||
assert_eq!(
|
||||
result.final_url,
|
||||
"https://www.zhihu.com/creator/analytics/work/all"
|
||||
);
|
||||
assert_eq!(sent.len(), 2);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info"
|
||||
&& message == "navigate https://www.zhihu.com/creator/analytics/work/all"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_clicks_creator_write_button() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "creator_write_button".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎入口已打开:写文章入口按钮 (https://zhuanlan.zhihu.com/write)"
|
||||
);
|
||||
assert_eq!(result.page, "creator_write_button");
|
||||
assert_eq!(result.final_url, "https://zhuanlan.zhihu.com/write");
|
||||
assert_eq!(sent.len(), 6);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/creator"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 写文章入口按钮"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[4],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message.contains("wait for textarea")
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[5],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 3 && action == &Action::WaitForSelector && security.expected_domain == "zhuanlan.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_notifications_menu_flow() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
|
||||
response(2, serde_json::json!({ "clicked": true })),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "notifications_menu".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(result.summary, "知乎菜单已打开:通知菜单");
|
||||
assert_eq!(result.page, "notifications_menu");
|
||||
assert_eq!(result.final_url, "https://www.zhihu.com/");
|
||||
assert_eq!(sent.len(), 4);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 通知菜单"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_profile_page_from_avatar_menu() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
|
||||
response(2, serde_json::json!({ "clicked": true })),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/people/test-user" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "profile_page".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎导航已完成:个人主页 (https://www.zhihu.com/people/test-user)"
|
||||
);
|
||||
assert_eq!(result.page, "profile_page");
|
||||
assert_eq!(result.final_url, "https://www.zhihu.com/people/test-user");
|
||||
assert_eq!(sent.len(), 6);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 头像菜单"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 2 && action == &Action::Click
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[4],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 个人主页入口"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[5],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 3 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_profile_answers_tab_from_avatar_menu() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
|
||||
response(2, serde_json::json!({ "clicked": true })),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/people/test-user" }),
|
||||
),
|
||||
response(
|
||||
4,
|
||||
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/people/test-user/answers" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "profile_answers_tab".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎导航已完成:回答分栏 (https://www.zhihu.com/people/test-user/answers)"
|
||||
);
|
||||
assert_eq!(result.page, "profile_answers_tab");
|
||||
assert_eq!(
|
||||
result.final_url,
|
||||
"https://www.zhihu.com/people/test-user/answers"
|
||||
);
|
||||
assert_eq!(sent.len(), 8);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 头像菜单"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 2 && action == &Action::Click
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[4],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 个人主页入口"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[5],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 3 && action == &Action::Click
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[6],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 回答分栏"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[7],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 4 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_account_settings_from_avatar_menu() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(1, serde_json::json!({ "url": "https://www.zhihu.com/" })),
|
||||
response(2, serde_json::json!({ "clicked": true })),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/settings/account" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "settings_account_menu".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎导航已完成:账号设置菜单 (https://www.zhihu.com/settings/account)"
|
||||
);
|
||||
assert_eq!(result.page, "settings_account_menu");
|
||||
assert_eq!(result.final_url, "https://www.zhihu.com/settings/account");
|
||||
assert_eq!(sent.len(), 6);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 头像菜单"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 2 && action == &Action::Click
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[4],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 账号设置菜单"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[5],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 3 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_notifications_replies_tab() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/notifications" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://www.zhihu.com/notifications/replies" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "notifications_replies_tab".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎入口已打开:回复我的 (https://www.zhihu.com/notifications/replies)"
|
||||
);
|
||||
assert_eq!(sent.len(), 4);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/notifications"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 回复我的"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_opens_messages_settings_menu() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/messages" }),
|
||||
),
|
||||
response(2, serde_json::json!({ "clicked": true })),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "messages_settings_menu".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(result.summary, "知乎菜单已打开:消息设置菜单");
|
||||
assert_eq!(result.final_url, "https://www.zhihu.com/messages");
|
||||
assert_eq!(sent.len(), 4);
|
||||
assert!(matches!(
|
||||
&sent[0],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "navigate https://www.zhihu.com/messages"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[1],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 1 && action == &Action::Navigate
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[2],
|
||||
AgentMessage::LogEntry { level, message }
|
||||
if level == "info" && message == "click 消息设置菜单"
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[3],
|
||||
AgentMessage::Command { seq, action, security, .. }
|
||||
if *seq == 2 && action == &Action::Click && security.expected_domain == "www.zhihu.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_navigation_skill_rejects_unknown_target() {
|
||||
let transport = Arc::new(MockTransport::new(vec![]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let err = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuNavigateRequest {
|
||||
page: "unknown_target".to_string(),
|
||||
ensure_loaded: true,
|
||||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
|
||||
assert!(err
|
||||
.to_string()
|
||||
.contains("unknown zhihu target: unknown_target"));
|
||||
}
|
||||
357
tests/zhihu_skill_test.rs
Normal file
357
tests/zhihu_skill_test.rs
Normal file
@@ -0,0 +1,357 @@
|
||||
mod common;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common::MockTransport;
|
||||
use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing};
|
||||
use sgclaw::security::MacPolicy;
|
||||
use sgclaw::skill::zhihu::{execute, load_flow, ZhihuWriteRequest};
|
||||
|
||||
fn test_policy() -> MacPolicy {
|
||||
MacPolicy::from_json_str(
|
||||
r#"{
|
||||
"version": "1.0",
|
||||
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
|
||||
"pipe_actions": {
|
||||
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
|
||||
"blocked": []
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn response(seq: u64, data: serde_json::Value) -> BrowserMessage {
|
||||
BrowserMessage::Response {
|
||||
seq,
|
||||
success: true,
|
||||
data,
|
||||
aom_snapshot: vec![],
|
||||
timing: Timing {
|
||||
queue_ms: 1,
|
||||
exec_ms: 10,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn load_flow_preserves_validated_zhihu_literals() {
|
||||
let flow = load_flow().unwrap();
|
||||
|
||||
assert_eq!(flow.entry_url, "https://www.zhihu.com/creator");
|
||||
assert_eq!(flow.editor_url, "https://zhuanlan.zhihu.com/write");
|
||||
assert_eq!(flow.literals["write_entry_text"], "写文章");
|
||||
assert_eq!(flow.literals["publish_confirm_text"], "确认发布");
|
||||
assert_eq!(
|
||||
flow.literals["title_placeholder"],
|
||||
"请输入标题(最多 100 个字)"
|
||||
);
|
||||
assert_eq!(
|
||||
flow.selectors["creator_write_entry"],
|
||||
"div.css-1q62b6s > div.css-byu4by"
|
||||
);
|
||||
assert_eq!(
|
||||
flow.selectors["publish_confirm_button"],
|
||||
"div[role='dialog'] button.Button--primary.Button--blue"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_skill_stops_before_publish_when_publish_is_false() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(4, serde_json::json!({ "typed": true })),
|
||||
response(5, serde_json::json!({ "typed": true })),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuWriteRequest {
|
||||
title: "自动发文能力测试".to_string(),
|
||||
body: "第一段\n\n第二段".to_string(),
|
||||
publish: false,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(result.summary, "知乎文章草稿已填充:自动发文能力测试");
|
||||
assert_eq!(sent.len(), 10);
|
||||
assert!(matches!(
|
||||
&sent[5],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 3 && action == &Action::WaitForSelector
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[9],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 5 && action == &Action::Type
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_skill_publishes_only_after_confirming_dialog_title_and_final_url() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(4, serde_json::json!({ "typed": true })),
|
||||
response(5, serde_json::json!({ "typed": true })),
|
||||
response(6, serde_json::json!({ "scrolled": true })),
|
||||
response(
|
||||
7,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(8, serde_json::json!({ "ready": true })),
|
||||
response(
|
||||
9,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
|
||||
),
|
||||
response(
|
||||
10,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
|
||||
),
|
||||
response(
|
||||
11,
|
||||
serde_json::json!({ "text": "自动发文能力测试", "url": "https://zhuanlan.zhihu.com/p/123456" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuWriteRequest {
|
||||
title: "自动发文能力测试".to_string(),
|
||||
body: "第一段\n\n第二段".to_string(),
|
||||
publish: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let sent = transport.sent_messages();
|
||||
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎文章已发布:自动发文能力测试 (https://zhuanlan.zhihu.com/p/123456)"
|
||||
);
|
||||
assert_eq!(
|
||||
result.final_url.as_deref(),
|
||||
Some("https://zhuanlan.zhihu.com/p/123456")
|
||||
);
|
||||
assert!(result.published);
|
||||
assert_eq!(sent.len(), 22);
|
||||
assert!(matches!(
|
||||
&sent[11],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 6 && action == &Action::ScrollTo
|
||||
));
|
||||
assert!(matches!(
|
||||
&sent[21],
|
||||
AgentMessage::Command { seq, action, .. }
|
||||
if *seq == 11 && action == &Action::GetText
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_skill_accepts_edit_url_as_published_article_url() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(4, serde_json::json!({ "typed": true })),
|
||||
response(5, serde_json::json!({ "typed": true })),
|
||||
response(6, serde_json::json!({ "scrolled": true })),
|
||||
response(
|
||||
7,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(8, serde_json::json!({ "ready": true })),
|
||||
response(
|
||||
9,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/p/123456/edit" }),
|
||||
),
|
||||
response(
|
||||
10,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/p/123456/edit" }),
|
||||
),
|
||||
response(
|
||||
11,
|
||||
serde_json::json!({ "text": "自动发文能力测试", "url": "https://zhuanlan.zhihu.com/p/123456/edit" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let result = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuWriteRequest {
|
||||
title: "自动发文能力测试".to_string(),
|
||||
body: "第一段\n\n第二段".to_string(),
|
||||
publish: true,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.final_url.as_deref(),
|
||||
Some("https://zhuanlan.zhihu.com/p/123456")
|
||||
);
|
||||
assert_eq!(
|
||||
result.summary,
|
||||
"知乎文章已发布:自动发文能力测试 (https://zhuanlan.zhihu.com/p/123456)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_skill_fails_when_publish_confirmation_never_returns_article_url() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(4, serde_json::json!({ "typed": true })),
|
||||
response(5, serde_json::json!({ "typed": true })),
|
||||
response(6, serde_json::json!({ "scrolled": true })),
|
||||
response(
|
||||
7,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(8, serde_json::json!({ "ready": true })),
|
||||
response(9, serde_json::json!({ "clicked": true })),
|
||||
response(10, serde_json::json!({ "ready": true })),
|
||||
response(11, serde_json::json!({ "text": "自动发文能力测试" })),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let err = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuWriteRequest {
|
||||
title: "自动发文能力测试".to_string(),
|
||||
body: "第一段\n\n第二段".to_string(),
|
||||
publish: true,
|
||||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
|
||||
assert!(err.to_string().contains("did not return article url"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zhihu_skill_fails_when_published_title_does_not_match_request_title() {
|
||||
let transport = Arc::new(MockTransport::new(vec![
|
||||
response(
|
||||
1,
|
||||
serde_json::json!({ "url": "https://www.zhihu.com/creator" }),
|
||||
),
|
||||
response(
|
||||
2,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(
|
||||
3,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(4, serde_json::json!({ "typed": true })),
|
||||
response(5, serde_json::json!({ "typed": true })),
|
||||
response(6, serde_json::json!({ "scrolled": true })),
|
||||
response(
|
||||
7,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/write" }),
|
||||
),
|
||||
response(8, serde_json::json!({ "ready": true })),
|
||||
response(
|
||||
9,
|
||||
serde_json::json!({ "clicked": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
|
||||
),
|
||||
response(
|
||||
10,
|
||||
serde_json::json!({ "ready": true, "url": "https://zhuanlan.zhihu.com/p/123456" }),
|
||||
),
|
||||
response(
|
||||
11,
|
||||
serde_json::json!({ "text": "别的标题", "url": "https://zhuanlan.zhihu.com/p/123456" }),
|
||||
),
|
||||
]));
|
||||
let browser_tool = BrowserPipeTool::new(
|
||||
transport.clone(),
|
||||
test_policy(),
|
||||
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
||||
)
|
||||
.with_response_timeout(Duration::from_secs(1));
|
||||
|
||||
let err = execute(
|
||||
transport.as_ref(),
|
||||
&browser_tool,
|
||||
ZhihuWriteRequest {
|
||||
title: "自动发文能力测试".to_string(),
|
||||
body: "第一段\n\n第二段".to_string(),
|
||||
publish: true,
|
||||
},
|
||||
)
|
||||
.unwrap_err();
|
||||
|
||||
assert!(err
|
||||
.to_string()
|
||||
.contains("expected text `自动发文能力测试`, got `别的标题`"));
|
||||
}
|
||||
Reference in New Issue
Block a user