Files
skill-lib/tests/zhihu_hotlist_skill_test.rs
木炎 6aad2ce48e feat: restore zhihu browser skills
Reconnect the recovered Zhihu skill flows to the live browser runtime and resolve their resources relative to the executable so they work outside the repo root.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-27 14:29:38 +08:00

404 lines
12 KiB
Rust

mod common;
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use common::MockTransport;
use sgclaw::pipe::{BrowserMessage, BrowserPipeTool, Timing};
use sgclaw::security::MacPolicy;
use sgclaw::skill::zhihu_hotlist::{
execute_collect, execute_report, load_flow, ZhihuHotlistCollectRequest,
ZhihuHotlistReportRequest,
};
use sgclaw::skill::zhihu_hotlist_store::load_latest_snapshot;
fn test_policy() -> MacPolicy {
MacPolicy::from_json_str(
r#"{
"version": "1.0",
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
"pipe_actions": {
"allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"],
"blocked": []
}
}"#,
)
.unwrap()
}
fn temp_store_dir(label: &str) -> PathBuf {
let unique = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
std::env::temp_dir().join(format!("sgclaw-{label}-{unique}"))
}
fn hotlist_html() -> String {
r#"
<html>
<body>
<main>
<section data-hot-item>
<h2><a href="/question/123">第一条热榜</a></h2>
<div class="HotItem-content">第一条摘要</div>
<div class="HotItem-hot">1234 热度</div>
</section>
<section data-hot-item>
<h2><a href="/question/456">第二条热榜</a></h2>
<div class="HotItem-content">第二条摘要</div>
<div class="HotItem-hot">5.6 万热度</div>
</section>
</main>
</body>
</html>
"#
.to_string()
}
fn comment_html(
first_reply: u64,
first_upvote: u64,
second_reply: u64,
second_upvote: u64,
) -> String {
format!(
r#"
<html>
<body>
<div class="CommentListV2">
<div class="CommentItemV2" data-comment-id="comment-1">
<button>回复 {first_reply}</button>
<button>赞同 {first_upvote}</button>
<button>收藏 2</button>
<button>红心 1</button>
</div>
<div class="CommentItemV2" data-comment-id="comment-2">
<button>回复 {second_reply}</button>
<button>赞同 {second_upvote}</button>
<button>收藏 4</button>
<button>红心 3</button>
</div>
</div>
</body>
</html>
"#
)
}
#[test]
fn load_hotlist_flow_preserves_expected_selectors() {
let flow = load_flow().unwrap();
assert_eq!(flow.hotlist_url, "https://www.zhihu.com/hot");
assert_eq!(flow.domains["zhihu"], "www.zhihu.com");
assert!(flow.selectors["hotlist_item"].contains("HotList-item"));
assert!(flow.selectors["comment_metric"].contains("button"));
}
#[test]
fn zhihu_hotlist_collect_persists_snapshot_and_report_reads_latest() {
let store_dir = temp_store_dir("hotlist-collect");
let transport = Arc::new(MockTransport::new(vec![
BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 2,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 3,
success: true,
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 4,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 5,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 6,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 7,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 8,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 9,
success: true,
data: serde_json::json!({ "html": comment_html(3, 15, 1, 8), "url": "https://www.zhihu.com/question/123" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 10,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/question/456" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 11,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 12,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 13,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 14,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 15,
success: true,
data: serde_json::json!({ "html": comment_html(5, 20, 4, 16), "url": "https://www.zhihu.com/question/456" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute_collect(
transport.as_ref(),
&browser_tool,
ZhihuHotlistCollectRequest {
top_n: 2,
comments_per_item: 2,
store_dir: Some(store_dir.display().to_string()),
},
)
.unwrap();
assert_eq!(result.item_count, 2);
assert!(result.summary.contains("知乎热榜快照已保存"));
let snapshot = load_latest_snapshot(&store_dir).unwrap();
assert_eq!(snapshot.items.len(), 2);
assert_eq!(snapshot.items[0].title, "第一条热榜");
assert_eq!(snapshot.items[0].summary, "第一条摘要");
assert_eq!(snapshot.items[0].heat_value, Some(1234));
assert_eq!(snapshot.items[0].comment_metrics.len(), 2);
assert_eq!(snapshot.items[0].comment_metrics[0].reply_count, Some(3));
assert_eq!(snapshot.items[0].comment_metrics[0].upvote_count, Some(15));
assert_eq!(snapshot.items[1].heat_value, Some(56_000));
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 4);
let report = execute_report(ZhihuHotlistReportRequest {
snapshot_id: Some(result.snapshot_id.clone()),
store_dir: Some(store_dir.display().to_string()),
top_n: 2,
})
.unwrap();
assert!(report.summary.contains("第一条热榜"));
assert!(report.summary.contains("第二条热榜"));
assert!(report.summary.contains("回复 4"));
assert!(report.summary.contains("赞同 23"));
let _ = fs::remove_dir_all(&store_dir);
}
#[test]
fn zhihu_hotlist_collect_persists_partial_snapshot_when_comment_capture_fails() {
let store_dir = temp_store_dir("hotlist-partial");
let transport = Arc::new(MockTransport::new(vec![
BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 2,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 3,
success: true,
data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 4,
success: true,
data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 5,
success: true,
data: serde_json::json!({ "ready": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 6,
success: true,
data: serde_json::json!({ "scrolled": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 7,
success: false,
data: serde_json::json!({ "error": "comment list missing" }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let result = execute_collect(
transport.as_ref(),
&browser_tool,
ZhihuHotlistCollectRequest {
top_n: 1,
comments_per_item: 2,
store_dir: Some(store_dir.display().to_string()),
},
)
.unwrap();
let snapshot = load_latest_snapshot(&store_dir).unwrap();
assert_eq!(result.item_count, 1);
assert_eq!(snapshot.collection_stats.partial_items, 1);
assert_eq!(snapshot.collection_stats.total_comment_metric_records, 0);
assert!(snapshot.items[0].comment_metrics.is_empty());
let _ = fs::remove_dir_all(&store_dir);
}