From 2ae71fb1c982d309d74711a8d2af5ef60719e099 Mon Sep 17 00:00:00 2001 From: zyl Date: Mon, 30 Mar 2026 18:07:19 +0800 Subject: [PATCH] compat: probe zhihu extractor before renavigate --- src/compat/workflow_executor.rs | 120 ++++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 7 deletions(-) diff --git a/src/compat/workflow_executor.rs b/src/compat/workflow_executor.rs index 918f4c8..6993908 100644 --- a/src/compat/workflow_executor.rs +++ b/src/compat/workflow_executor.rs @@ -158,7 +158,9 @@ fn collect_hotlist_items( top_n: usize, task_context: &CompatTaskContext, ) -> Result, PipeError> { - ensure_hotlist_page_ready(transport, browser_tool, task_context)?; + if let Some(items) = ensure_hotlist_page_ready(transport, browser_tool, top_n, task_context)? { + return Ok(items); + } transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: "call zhihu-hotlist.extract_hotlist".to_string(), @@ -186,8 +188,9 @@ fn collect_hotlist_items( fn ensure_hotlist_page_ready( transport: &T, browser_tool: &BrowserPipeTool, + top_n: usize, task_context: &CompatTaskContext, -) -> Result<(), PipeError> { +) -> Result>, PipeError> { let starts_on_hotlist = task_context .page_url .as_deref() @@ -198,14 +201,22 @@ fn ensure_hotlist_page_ready( .is_some_and(|title| title.contains("热榜")); if starts_on_hotlist && poll_for_hotlist_readiness(browser_tool)? { - return Ok(()); + return Ok(None); + } + if starts_on_hotlist { + if let Some(items) = probe_hotlist_extractor(transport, browser_tool, top_n)? { + return Ok(Some(items)); + } } let mut last_error = None; for attempt in 0..2 { navigate_hotlist_page(transport, browser_tool)?; if poll_for_hotlist_readiness(browser_tool)? { - return Ok(()); + return Ok(None); + } + if let Some(items) = probe_hotlist_extractor(transport, browser_tool, top_n)? { + return Ok(Some(items)); } last_error = Some(PipeError::Protocol(format!( "知乎热榜页面已打开,但在短轮询窗口内仍未出现可读热榜内容(attempt={})", @@ -216,6 +227,31 @@ fn ensure_hotlist_page_ready( Err(last_error.unwrap_or_else(|| PipeError::Protocol("知乎热榜页面未就绪".to_string()))) } +fn probe_hotlist_extractor( + transport: &T, + browser_tool: &BrowserPipeTool, + top_n: usize, +) -> Result>, PipeError> { + transport.send(&AgentMessage::LogEntry { + level: "info".to_string(), + message: "call zhihu-hotlist.extract_hotlist".to_string(), + })?; + let response = browser_tool.invoke( + Action::Eval, + json!({ "script": load_hotlist_extractor_script(top_n)? }), + ZHIHU_DOMAIN, + )?; + if !response.success { + return Ok(None); + } + + match parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data)) { + Ok(items) if !items.is_empty() => Ok(Some(items)), + Ok(_) => Ok(None), + Err(_) => Ok(None), + } +} + fn navigate_hotlist_page( transport: &T, browser_tool: &BrowserPipeTool, @@ -826,10 +862,14 @@ mod tests { success_browser_response(9, json!({ "text": "" })), success_browser_response(10, json!({ "text": "" })), success_browser_response(11, json!({ "text": "" })), - success_browser_response(12, json!({ "navigated": true })), - success_browser_response(13, json!({ "text": "知乎热榜\n1 问题一 344万热度" })), + success_browser_response(12, json!({ "text": { "rows": [] } })), + success_browser_response(13, json!({ "navigated": true })), success_browser_response( 14, + json!({ "text": "知乎热榜\n1 问题一 344万热度" }), + ), + success_browser_response( + 15, json!({ "text": { "source": "https://www.zhihu.com/hot", @@ -843,7 +883,7 @@ mod tests { let browser_tool = BrowserPipeTool::new( transport.clone(), zhihu_test_policy(), - vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ) .with_response_timeout(Duration::from_secs(1)); let task_context = CompatTaskContext { @@ -868,6 +908,72 @@ mod tests { .count(); assert_eq!(navigate_count, 2); } + + #[test] + fn collect_hotlist_items_uses_extractor_probe_before_second_navigation() { + let transport = Arc::new(MockWorkflowTransport::new(vec![ + success_browser_response(1, json!({ "navigated": true })), + success_browser_response(2, json!({ "text": "知乎热榜" })), + success_browser_response(3, json!({ "text": "知乎热榜" })), + success_browser_response(4, json!({ "text": "知乎热榜" })), + success_browser_response(5, json!({ "text": "知乎热榜" })), + success_browser_response(6, json!({ "text": "知乎热榜" })), + success_browser_response(7, json!({ "text": "知乎热榜" })), + success_browser_response(8, json!({ "text": "知乎热榜" })), + success_browser_response(9, json!({ "text": "知乎热榜" })), + success_browser_response(10, json!({ "text": "知乎热榜" })), + success_browser_response(11, json!({ "text": "知乎热榜" })), + success_browser_response( + 12, + json!({ + "text": { + "source": "https://www.zhihu.com/hot", + "sheet_name": "知乎热榜", + "columns": ["rank", "title", "heat"], + "rows": [[1, "问题一", "344万"]] + } + }), + ), + success_browser_response( + 13, + json!({ + "text": { + "source": "https://www.zhihu.com/hot", + "sheet_name": "知乎热榜", + "columns": ["rank", "title", "heat"], + "rows": [[1, "问题一", "344万"]] + } + }), + ), + ])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + zhihu_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + ) + .with_response_timeout(Duration::from_secs(1)); + let task_context = CompatTaskContext { + page_url: Some("https://www.zhihu.com/".to_string()), + page_title: Some("知乎".to_string()), + ..CompatTaskContext::default() + }; + + let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context) + .expect("hotlist collection should succeed via extractor probe"); + + assert_eq!(items.len(), 1); + let sent = transport.sent_messages(); + let navigate_count = sent + .iter() + .filter(|message| { + matches!( + message, + AgentMessage::Command { action, .. } if action == &Action::Navigate + ) + }) + .count(); + assert_eq!(navigate_count, 1); + } } fn load_browser_skill_script(