compat: probe zhihu extractor before renavigate

This commit is contained in:
zyl
2026-03-30 18:07:19 +08:00
parent 5bccd02d6f
commit 2ae71fb1c9

View File

@@ -158,7 +158,9 @@ fn collect_hotlist_items<T: Transport + 'static>(
top_n: usize,
task_context: &CompatTaskContext,
) -> Result<Vec<HotlistItem>, PipeError> {
ensure_hotlist_page_ready(transport, browser_tool, task_context)?;
if let Some(items) = ensure_hotlist_page_ready(transport, browser_tool, top_n, task_context)? {
return Ok(items);
}
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-hotlist.extract_hotlist".to_string(),
@@ -186,8 +188,9 @@ fn collect_hotlist_items<T: Transport + 'static>(
fn ensure_hotlist_page_ready<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
top_n: usize,
task_context: &CompatTaskContext,
) -> Result<(), PipeError> {
) -> Result<Option<Vec<HotlistItem>>, PipeError> {
let starts_on_hotlist = task_context
.page_url
.as_deref()
@@ -198,14 +201,22 @@ fn ensure_hotlist_page_ready<T: Transport + 'static>(
.is_some_and(|title| title.contains("热榜"));
if starts_on_hotlist && poll_for_hotlist_readiness(browser_tool)? {
return Ok(());
return Ok(None);
}
if starts_on_hotlist {
if let Some(items) = probe_hotlist_extractor(transport, browser_tool, top_n)? {
return Ok(Some(items));
}
}
let mut last_error = None;
for attempt in 0..2 {
navigate_hotlist_page(transport, browser_tool)?;
if poll_for_hotlist_readiness(browser_tool)? {
return Ok(());
return Ok(None);
}
if let Some(items) = probe_hotlist_extractor(transport, browser_tool, top_n)? {
return Ok(Some(items));
}
last_error = Some(PipeError::Protocol(format!(
"知乎热榜页面已打开但在短轮询窗口内仍未出现可读热榜内容attempt={}",
@@ -216,6 +227,31 @@ fn ensure_hotlist_page_ready<T: Transport + 'static>(
Err(last_error.unwrap_or_else(|| PipeError::Protocol("知乎热榜页面未就绪".to_string())))
}
fn probe_hotlist_extractor<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
top_n: usize,
) -> Result<Option<Vec<HotlistItem>>, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-hotlist.extract_hotlist".to_string(),
})?;
let response = browser_tool.invoke(
Action::Eval,
json!({ "script": load_hotlist_extractor_script(top_n)? }),
ZHIHU_DOMAIN,
)?;
if !response.success {
return Ok(None);
}
match parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data)) {
Ok(items) if !items.is_empty() => Ok(Some(items)),
Ok(_) => Ok(None),
Err(_) => Ok(None),
}
}
fn navigate_hotlist_page<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
@@ -826,10 +862,14 @@ mod tests {
success_browser_response(9, json!({ "text": "" })),
success_browser_response(10, json!({ "text": "" })),
success_browser_response(11, json!({ "text": "" })),
success_browser_response(12, json!({ "navigated": true })),
success_browser_response(13, json!({ "text": "知乎热榜\n1 问题一 344万热度" })),
success_browser_response(12, json!({ "text": { "rows": [] } })),
success_browser_response(13, json!({ "navigated": true })),
success_browser_response(
14,
json!({ "text": "知乎热榜\n1 问题一 344万热度" }),
),
success_browser_response(
15,
json!({
"text": {
"source": "https://www.zhihu.com/hot",
@@ -843,7 +883,7 @@ mod tests {
let browser_tool = BrowserPipeTool::new(
transport.clone(),
zhihu_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
)
.with_response_timeout(Duration::from_secs(1));
let task_context = CompatTaskContext {
@@ -868,6 +908,72 @@ mod tests {
.count();
assert_eq!(navigate_count, 2);
}
#[test]
fn collect_hotlist_items_uses_extractor_probe_before_second_navigation() {
let transport = Arc::new(MockWorkflowTransport::new(vec![
success_browser_response(1, json!({ "navigated": true })),
success_browser_response(2, json!({ "text": "知乎热榜" })),
success_browser_response(3, json!({ "text": "知乎热榜" })),
success_browser_response(4, json!({ "text": "知乎热榜" })),
success_browser_response(5, json!({ "text": "知乎热榜" })),
success_browser_response(6, json!({ "text": "知乎热榜" })),
success_browser_response(7, json!({ "text": "知乎热榜" })),
success_browser_response(8, json!({ "text": "知乎热榜" })),
success_browser_response(9, json!({ "text": "知乎热榜" })),
success_browser_response(10, json!({ "text": "知乎热榜" })),
success_browser_response(11, json!({ "text": "知乎热榜" })),
success_browser_response(
12,
json!({
"text": {
"source": "https://www.zhihu.com/hot",
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": [[1, "问题一", "344万"]]
}
}),
),
success_browser_response(
13,
json!({
"text": {
"source": "https://www.zhihu.com/hot",
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": [[1, "问题一", "344万"]]
}
}),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
zhihu_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
)
.with_response_timeout(Duration::from_secs(1));
let task_context = CompatTaskContext {
page_url: Some("https://www.zhihu.com/".to_string()),
page_title: Some("知乎".to_string()),
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
.expect("hotlist collection should succeed via extractor probe");
assert_eq!(items.len(), 1);
let sent = transport.sent_messages();
let navigate_count = sent
.iter()
.filter(|message| {
matches!(
message,
AgentMessage::Command { action, .. } if action == &Action::Navigate
)
})
.count();
assert_eq!(navigate_count, 1);
}
}
fn load_browser_skill_script(