use std::fs; use std::path::Path; use regex::Regex; use serde_json::{json, Value}; use zeroclaw::tools::Tool; use crate::compat::openxml_office_tool::OpenXmlOfficeTool; use crate::compat::screen_html_export_tool::ScreenHtmlExportTool; use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport}; const ZHIHU_DOMAIN: &str = "www.zhihu.com"; const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot"; #[derive(Debug, Clone, PartialEq, Eq)] pub enum WorkflowRoute { ZhihuHotlistExportXlsx, ZhihuHotlistScreen, } #[derive(Debug, Clone, PartialEq, Eq)] struct HotlistItem { rank: u64, title: String, heat: String, } pub fn detect_route( instruction: &str, page_url: Option<&str>, page_title: Option<&str>, ) -> Option { if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) { return None; } let normalized = instruction.to_ascii_lowercase(); if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") { return Some(WorkflowRoute::ZhihuHotlistScreen); } if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") { return Some(WorkflowRoute::ZhihuHotlistExportXlsx); } None } pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool { let normalized = summary.to_ascii_lowercase(); if normalized.contains(".xlsx") || normalized.contains(".html") { return false; } let looks_like_denial = summary.contains("拒绝") || normalized.contains("denied") || normalized.contains("failed") || summary.contains("失败") || summary.contains("无法"); looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen) } pub fn execute_route( transport: &T, browser_tool: &BrowserPipeTool, workspace_root: &Path, instruction: &str, route: WorkflowRoute, ) -> Result { let top_n = extract_top_n(instruction); let items = collect_hotlist_items(transport, browser_tool, top_n)?; if items.is_empty() { return Err(PipeError::Protocol( "知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(), )); } match route { WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items), WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items), } } fn collect_hotlist_items( transport: &T, browser_tool: &BrowserPipeTool, top_n: usize, ) -> Result, PipeError> { navigate_hotlist_with_retry(transport, browser_tool)?; transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: "call zhihu-hotlist.extract_hotlist".to_string(), })?; let response = browser_tool.invoke( Action::Eval, json!({ "script": load_hotlist_extractor_script(top_n)? }), ZHIHU_DOMAIN, )?; if !response.success { return Err(PipeError::Protocol(format!( "知乎热榜采集失败:{}", response .data .get("error") .and_then(|value| value.get("message")) .and_then(Value::as_str) .unwrap_or("browser script execution failed") ))); } parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data)) } fn navigate_hotlist_with_retry( transport: &T, browser_tool: &BrowserPipeTool, ) -> Result<(), PipeError> { let mut last_error = None; for _ in 0..2 { transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: format!("navigate {ZHIHU_HOT_URL}"), })?; match browser_tool.invoke( Action::Navigate, json!({ "url": ZHIHU_HOT_URL }), ZHIHU_DOMAIN, ) { Ok(response) if response.success => return Ok(()), Ok(response) => { last_error = Some(PipeError::Protocol(format!( "navigate failed: {}", response.data ))); } Err(err) => last_error = Some(err), } } Err(last_error.unwrap_or_else(|| { PipeError::Protocol("navigate failed without detailed error".to_string()) })) } fn export_xlsx( transport: &T, workspace_root: &Path, items: &[HotlistItem], ) -> Result { transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: "call openxml_office".to_string(), })?; let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf()); let rows = items .iter() .map(|item| json!([item.rank, item.title, item.heat])) .collect::>(); let runtime = tokio::runtime::Runtime::new() .map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?; let result = runtime .block_on(tool.execute(json!({ "sheet_name": "知乎热榜", "columns": ["rank", "title", "heat"], "rows": rows, }))) .map_err(|err| PipeError::Protocol(err.to_string()))?; if !result.success { return Err(PipeError::Protocol( result.error.unwrap_or_else(|| "openxml_office failed".to_string()), )); } let payload: Value = serde_json::from_str(&result.output) .map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?; let output_path = payload["output_path"] .as_str() .ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?; Ok(format!("已导出知乎热榜 Excel {output_path}")) } fn export_screen( transport: &T, workspace_root: &Path, items: &[HotlistItem], ) -> Result { transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: "call screen_html_export".to_string(), })?; let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf()); let rows = items .iter() .map(|item| json!([item.rank, item.title, item.heat])) .collect::>(); let runtime = tokio::runtime::Runtime::new() .map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?; let result = runtime .block_on(tool.execute(json!({ "rows": rows }))) .map_err(|err| PipeError::Protocol(err.to_string()))?; if !result.success { return Err(PipeError::Protocol( result.error.unwrap_or_else(|| "screen_html_export failed".to_string()), )); } let payload: Value = serde_json::from_str(&result.output) .map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?; let output_path = payload["output_path"] .as_str() .ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?; Ok(format!("已生成知乎热榜大屏 {output_path}")) } fn load_hotlist_extractor_script(top_n: usize) -> Result { let script_path = Path::new(env!("CARGO_MANIFEST_DIR")) .parent() .unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR"))) .join("skill_lib") .join("skills") .join("zhihu-hotlist") .join("scripts") .join("extract_hotlist.js"); let script = fs::read_to_string(&script_path).map_err(|err| { PipeError::Protocol(format!( "failed to read zhihu hotlist extractor script {}: {err}", script_path.display() )) })?; Ok(format!( "(function() {{\nconst args = {};\n{}\n}})()", json!({ "top_n": top_n.to_string() }), script )) } fn parse_hotlist_items_payload(payload: &Value) -> Result, PipeError> { let normalized_payload = if let Some(text) = payload.as_str() { serde_json::from_str::(text).unwrap_or_else(|_| Value::String(text.to_string())) } else { payload.clone() }; let rows = normalized_payload .get("rows") .and_then(Value::as_array) .ok_or_else(|| { PipeError::Protocol("知乎热榜采集失败:浏览器脚本未返回 rows".to_string()) })?; let mut items = Vec::new(); for row in rows { let Some(cells) = row.as_array() else { continue; }; if cells.len() != 3 { continue; } let rank = cells[0] .as_u64() .or_else(|| cells[0].as_str().and_then(|value| value.parse::().ok())) .unwrap_or((items.len() + 1) as u64); let title = cells[1].as_str().unwrap_or_default().trim().to_string(); let heat = cells[2].as_str().unwrap_or_default().trim().to_string(); if title.is_empty() || heat.is_empty() { continue; } items.push(HotlistItem { rank, title, heat }); } if items.is_empty() { return Err(PipeError::Protocol( "知乎热榜采集失败:浏览器脚本未返回有效热榜条目".to_string(), )); } Ok(items) } fn extract_top_n(instruction: &str) -> usize { let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex"); re.captures(&instruction.to_ascii_lowercase()) .and_then(|capture| capture.get(1)) .and_then(|value| value.as_str().parse::().ok()) .filter(|value| *value > 0) .unwrap_or(10) }