Reconnect the recovered Zhihu skill flows to the live browser runtime and resolve their resources relative to the executable so they work outside the repo root. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
404 lines
12 KiB
Rust
404 lines
12 KiB
Rust
mod common;
|
|
|
|
use std::fs;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|
|
|
use common::MockTransport;
|
|
use sgclaw::pipe::{BrowserMessage, BrowserPipeTool, Timing};
|
|
use sgclaw::security::MacPolicy;
|
|
use sgclaw::skill::zhihu_hotlist::{
|
|
execute_collect, execute_report, load_flow, ZhihuHotlistCollectRequest,
|
|
ZhihuHotlistReportRequest,
|
|
};
|
|
use sgclaw::skill::zhihu_hotlist_store::load_latest_snapshot;
|
|
|
|
fn test_policy() -> MacPolicy {
|
|
MacPolicy::from_json_str(
|
|
r#"{
|
|
"version": "1.0",
|
|
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
|
|
"pipe_actions": {
|
|
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
|
|
"blocked": []
|
|
}
|
|
}"#,
|
|
)
|
|
.unwrap()
|
|
}
|
|
|
|
fn temp_store_dir(label: &str) -> PathBuf {
|
|
let unique = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap()
|
|
.as_nanos();
|
|
std::env::temp_dir().join(format!("sgclaw-{label}-{unique}"))
|
|
}
|
|
|
|
fn hotlist_html() -> String {
|
|
r#"
|
|
<html>
|
|
<body>
|
|
<main>
|
|
<section data-hot-item>
|
|
<h2><a href="/question/123">第一条热榜</a></h2>
|
|
<div class="HotItem-content">第一条摘要</div>
|
|
<div class="HotItem-hot">1234 热度</div>
|
|
</section>
|
|
<section data-hot-item>
|
|
<h2><a href="/question/456">第二条热榜</a></h2>
|
|
<div class="HotItem-content">第二条摘要</div>
|
|
<div class="HotItem-hot">5.6 万热度</div>
|
|
</section>
|
|
</main>
|
|
</body>
|
|
</html>
|
|
"#
|
|
.to_string()
|
|
}
|
|
|
|
fn comment_html(
|
|
first_reply: u64,
|
|
first_upvote: u64,
|
|
second_reply: u64,
|
|
second_upvote: u64,
|
|
) -> String {
|
|
format!(
|
|
r#"
|
|
<html>
|
|
<body>
|
|
<div class="CommentListV2">
|
|
<div class="CommentItemV2" data-comment-id="comment-1">
|
|
<button>回复 {first_reply}</button>
|
|
<button>赞同 {first_upvote}</button>
|
|
<button>收藏 2</button>
|
|
<button>红心 1</button>
|
|
</div>
|
|
<div class="CommentItemV2" data-comment-id="comment-2">
|
|
<button>回复 {second_reply}</button>
|
|
<button>赞同 {second_upvote}</button>
|
|
<button>收藏 4</button>
|
|
<button>红心 3</button>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"#
|
|
)
|
|
}
|
|
|
|
#[test]
|
|
fn load_hotlist_flow_preserves_expected_selectors() {
|
|
let flow = load_flow().unwrap();
|
|
|
|
assert_eq!(flow.hotlist_url, "https://www.zhihu.com/hot");
|
|
assert_eq!(flow.domains["zhihu"], "www.zhihu.com");
|
|
assert!(flow.selectors["hotlist_item"].contains("HotList-item"));
|
|
assert!(flow.selectors["comment_metric"].contains("button"));
|
|
}
|
|
|
|
#[test]
|
|
fn zhihu_hotlist_collect_persists_snapshot_and_report_reads_latest() {
|
|
let store_dir = temp_store_dir("hotlist-collect");
|
|
let transport = Arc::new(MockTransport::new(vec![
|
|
BrowserMessage::Response {
|
|
seq: 1,
|
|
success: true,
|
|
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 2,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 3,
|
|
success: true,
|
|
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 4,
|
|
success: true,
|
|
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 5,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 6,
|
|
success: true,
|
|
data: serde_json::json!({ "scrolled": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 7,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 8,
|
|
success: true,
|
|
data: serde_json::json!({ "scrolled": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 9,
|
|
success: true,
|
|
data: serde_json::json!({ "html": comment_html(3, 15, 1, 8), "url": "https://www.zhihu.com/question/123" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 10,
|
|
success: true,
|
|
data: serde_json::json!({ "url": "https://www.zhihu.com/question/456" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 11,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 12,
|
|
success: true,
|
|
data: serde_json::json!({ "scrolled": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 13,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 14,
|
|
success: true,
|
|
data: serde_json::json!({ "scrolled": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 15,
|
|
success: true,
|
|
data: serde_json::json!({ "html": comment_html(5, 20, 4, 16), "url": "https://www.zhihu.com/question/456" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
]));
|
|
let browser_tool = BrowserPipeTool::new(
|
|
transport.clone(),
|
|
test_policy(),
|
|
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
|
)
|
|
.with_response_timeout(Duration::from_secs(1));
|
|
|
|
let result = execute_collect(
|
|
transport.as_ref(),
|
|
&browser_tool,
|
|
ZhihuHotlistCollectRequest {
|
|
top_n: 2,
|
|
comments_per_item: 2,
|
|
store_dir: Some(store_dir.display().to_string()),
|
|
},
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(result.item_count, 2);
|
|
assert!(result.summary.contains("知乎热榜快照已保存"));
|
|
|
|
let snapshot = load_latest_snapshot(&store_dir).unwrap();
|
|
assert_eq!(snapshot.items.len(), 2);
|
|
assert_eq!(snapshot.items[0].title, "第一条热榜");
|
|
assert_eq!(snapshot.items[0].summary, "第一条摘要");
|
|
assert_eq!(snapshot.items[0].heat_value, Some(1234));
|
|
assert_eq!(snapshot.items[0].comment_metrics.len(), 2);
|
|
assert_eq!(snapshot.items[0].comment_metrics[0].reply_count, Some(3));
|
|
assert_eq!(snapshot.items[0].comment_metrics[0].upvote_count, Some(15));
|
|
assert_eq!(snapshot.items[1].heat_value, Some(56_000));
|
|
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 4);
|
|
|
|
let report = execute_report(ZhihuHotlistReportRequest {
|
|
snapshot_id: Some(result.snapshot_id.clone()),
|
|
store_dir: Some(store_dir.display().to_string()),
|
|
top_n: 2,
|
|
})
|
|
.unwrap();
|
|
|
|
assert!(report.summary.contains("第一条热榜"));
|
|
assert!(report.summary.contains("第二条热榜"));
|
|
assert!(report.summary.contains("回复 4"));
|
|
assert!(report.summary.contains("赞同 23"));
|
|
|
|
let _ = fs::remove_dir_all(&store_dir);
|
|
}
|
|
|
|
#[test]
|
|
fn zhihu_hotlist_collect_persists_partial_snapshot_when_comment_capture_fails() {
|
|
let store_dir = temp_store_dir("hotlist-partial");
|
|
let transport = Arc::new(MockTransport::new(vec![
|
|
BrowserMessage::Response {
|
|
seq: 1,
|
|
success: true,
|
|
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 2,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 3,
|
|
success: true,
|
|
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 4,
|
|
success: true,
|
|
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 5,
|
|
success: true,
|
|
data: serde_json::json!({ "ready": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 6,
|
|
success: true,
|
|
data: serde_json::json!({ "scrolled": true }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
BrowserMessage::Response {
|
|
seq: 7,
|
|
success: false,
|
|
data: serde_json::json!({ "error": "comment list missing" }),
|
|
aom_snapshot: vec![],
|
|
timing: Timing {
|
|
queue_ms: 1,
|
|
exec_ms: 10,
|
|
},
|
|
},
|
|
]));
|
|
let browser_tool = BrowserPipeTool::new(
|
|
transport.clone(),
|
|
test_policy(),
|
|
vec![1, 2, 3, 4, 5, 6, 7, 8],
|
|
)
|
|
.with_response_timeout(Duration::from_secs(1));
|
|
|
|
let result = execute_collect(
|
|
transport.as_ref(),
|
|
&browser_tool,
|
|
ZhihuHotlistCollectRequest {
|
|
top_n: 1,
|
|
comments_per_item: 2,
|
|
store_dir: Some(store_dir.display().to_string()),
|
|
},
|
|
)
|
|
.unwrap();
|
|
|
|
let snapshot = load_latest_snapshot(&store_dir).unwrap();
|
|
assert_eq!(result.item_count, 1);
|
|
assert_eq!(snapshot.collection_stats.partial_items, 1);
|
|
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 0);
|
|
assert!(snapshot.items[0].comment_metrics.is_empty());
|
|
|
|
let _ = fs::remove_dir_all(&store_dir);
|
|
}
|