mod common; use std::fs; use std::path::PathBuf; use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use common::MockTransport; use sgclaw::pipe::{BrowserMessage, BrowserPipeTool, Timing}; use sgclaw::security::MacPolicy; use sgclaw::skill::zhihu_hotlist::{ execute_collect, execute_report, load_flow, ZhihuHotlistCollectRequest, ZhihuHotlistReportRequest, }; use sgclaw::skill::zhihu_hotlist_store::load_latest_snapshot; fn test_policy() -> MacPolicy { MacPolicy::from_json_str( r#"{ "version": "1.0", "domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] }, "pipe_actions": { "allowed": ["click", "type", "navigate", "getText", "getHtml", "waitForSelector", "scrollTo"], "blocked": [] } }"#, ) .unwrap() } fn temp_store_dir(label: &str) -> PathBuf { let unique = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap() .as_nanos(); std::env::temp_dir().join(format!("sgclaw-{label}-{unique}")) } fn hotlist_html() -> String { r#"

第一条热榜

第一条摘要
1234 热度

第二条热榜

第二条摘要
5.6 万热度
"# .to_string() } fn comment_html( first_reply: u64, first_upvote: u64, second_reply: u64, second_upvote: u64, ) -> String { format!( r#"
"# ) } #[test] fn load_hotlist_flow_preserves_expected_selectors() { let flow = load_flow().unwrap(); assert_eq!(flow.hotlist_url, "https://www.zhihu.com/hot"); assert_eq!(flow.domains["zhihu"], "www.zhihu.com"); assert!(flow.selectors["hotlist_item"].contains("HotList-item")); assert!(flow.selectors["comment_metric"].contains("button")); } #[test] fn zhihu_hotlist_collect_persists_snapshot_and_report_reads_latest() { let store_dir = temp_store_dir("hotlist-collect"); let transport = Arc::new(MockTransport::new(vec![ BrowserMessage::Response { seq: 1, success: true, data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 2, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 3, success: true, data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 4, success: true, data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 5, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 6, success: true, data: serde_json::json!({ "scrolled": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 7, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 8, success: true, data: serde_json::json!({ "scrolled": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 9, success: true, data: serde_json::json!({ "html": comment_html(3, 15, 1, 8), "url": "https://www.zhihu.com/question/123" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 10, success: true, data: serde_json::json!({ "url": "https://www.zhihu.com/question/456" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 11, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 12, success: true, data: serde_json::json!({ "scrolled": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 13, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 14, success: true, data: serde_json::json!({ "scrolled": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 15, success: true, data: serde_json::json!({ "html": comment_html(5, 20, 4, 16), "url": "https://www.zhihu.com/question/456" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, ])); let browser_tool = BrowserPipeTool::new( transport.clone(), test_policy(), vec![1, 2, 3, 4, 5, 6, 7, 8], ) .with_response_timeout(Duration::from_secs(1)); let result = execute_collect( transport.as_ref(), &browser_tool, ZhihuHotlistCollectRequest { top_n: 2, comments_per_item: 2, store_dir: Some(store_dir.display().to_string()), }, ) .unwrap(); assert_eq!(result.item_count, 2); assert!(result.summary.contains("知乎热榜快照已保存")); let snapshot = load_latest_snapshot(&store_dir).unwrap(); assert_eq!(snapshot.items.len(), 2); assert_eq!(snapshot.items[0].title, "第一条热榜"); assert_eq!(snapshot.items[0].summary, "第一条摘要"); assert_eq!(snapshot.items[0].heat_value, Some(1234)); assert_eq!(snapshot.items[0].comment_metrics.len(), 2); assert_eq!(snapshot.items[0].comment_metrics[0].reply_count, Some(3)); assert_eq!(snapshot.items[0].comment_metrics[0].upvote_count, Some(15)); assert_eq!(snapshot.items[1].heat_value, Some(56_000)); assert_eq!(snapshot.collection_stats.total_comment_metric_records, 4); let report = execute_report(ZhihuHotlistReportRequest { snapshot_id: Some(result.snapshot_id.clone()), store_dir: Some(store_dir.display().to_string()), top_n: 2, }) .unwrap(); assert!(report.summary.contains("第一条热榜")); assert!(report.summary.contains("第二条热榜")); assert!(report.summary.contains("回复 4")); assert!(report.summary.contains("赞同 23")); let _ = fs::remove_dir_all(&store_dir); } #[test] fn zhihu_hotlist_collect_persists_partial_snapshot_when_comment_capture_fails() { let store_dir = temp_store_dir("hotlist-partial"); let transport = Arc::new(MockTransport::new(vec![ BrowserMessage::Response { seq: 1, success: true, data: serde_json::json!({ "text": "知乎热榜 当前页", "url": "https://www.zhihu.com/hot" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 2, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 3, success: true, data: serde_json::json!({ "html": hotlist_html(), "url": "https://www.zhihu.com/hot" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 4, success: true, data: serde_json::json!({ "url": "https://www.zhihu.com/question/123" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 5, success: true, data: serde_json::json!({ "ready": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 6, success: true, data: serde_json::json!({ "scrolled": true }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, BrowserMessage::Response { seq: 7, success: false, data: serde_json::json!({ "error": "comment list missing" }), aom_snapshot: vec![], timing: Timing { queue_ms: 1, exec_ms: 10, }, }, ])); let browser_tool = BrowserPipeTool::new( transport.clone(), test_policy(), vec![1, 2, 3, 4, 5, 6, 7, 8], ) .with_response_timeout(Duration::from_secs(1)); let result = execute_collect( transport.as_ref(), &browser_tool, ZhihuHotlistCollectRequest { top_n: 1, comments_per_item: 2, store_dir: Some(store_dir.display().to_string()), }, ) .unwrap(); let snapshot = load_latest_snapshot(&store_dir).unwrap(); assert_eq!(result.item_count, 1); assert_eq!(snapshot.collection_stats.partial_items, 1); assert_eq!(snapshot.collection_stats.total_comment_metric_records, 0); assert!(snapshot.items[0].comment_metrics.is_empty()); let _ = fs::remove_dir_all(&store_dir); }