286 lines
9.6 KiB
Rust
286 lines
9.6 KiB
Rust
use std::fs;
|
|
use std::path::Path;
|
|
|
|
use regex::Regex;
|
|
use serde_json::{json, Value};
|
|
use zeroclaw::tools::Tool;
|
|
|
|
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
|
|
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
|
|
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
|
|
|
|
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
|
|
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum WorkflowRoute {
|
|
ZhihuHotlistExportXlsx,
|
|
ZhihuHotlistScreen,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
struct HotlistItem {
|
|
rank: u64,
|
|
title: String,
|
|
heat: String,
|
|
}
|
|
|
|
pub fn detect_route(
|
|
instruction: &str,
|
|
page_url: Option<&str>,
|
|
page_title: Option<&str>,
|
|
) -> Option<WorkflowRoute> {
|
|
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
|
|
return None;
|
|
}
|
|
|
|
let normalized = instruction.to_ascii_lowercase();
|
|
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
|
|
return Some(WorkflowRoute::ZhihuHotlistScreen);
|
|
}
|
|
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
|
|
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
|
|
}
|
|
None
|
|
}
|
|
|
|
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
|
|
let normalized = summary.to_ascii_lowercase();
|
|
if normalized.contains(".xlsx") || normalized.contains(".html") {
|
|
return false;
|
|
}
|
|
|
|
let looks_like_denial = summary.contains("拒绝") ||
|
|
normalized.contains("denied") ||
|
|
normalized.contains("failed") ||
|
|
summary.contains("失败") ||
|
|
summary.contains("无法");
|
|
|
|
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
|
|
}
|
|
|
|
pub fn execute_route<T: Transport + 'static>(
|
|
transport: &T,
|
|
browser_tool: &BrowserPipeTool<T>,
|
|
workspace_root: &Path,
|
|
instruction: &str,
|
|
route: WorkflowRoute,
|
|
) -> Result<String, PipeError> {
|
|
let top_n = extract_top_n(instruction);
|
|
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
|
|
if items.is_empty() {
|
|
return Err(PipeError::Protocol(
|
|
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
|
|
));
|
|
}
|
|
|
|
match route {
|
|
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
|
|
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
|
|
}
|
|
}
|
|
|
|
fn collect_hotlist_items<T: Transport + 'static>(
|
|
transport: &T,
|
|
browser_tool: &BrowserPipeTool<T>,
|
|
top_n: usize,
|
|
) -> Result<Vec<HotlistItem>, PipeError> {
|
|
navigate_hotlist_with_retry(transport, browser_tool)?;
|
|
transport.send(&AgentMessage::LogEntry {
|
|
level: "info".to_string(),
|
|
message: "call zhihu-hotlist.extract_hotlist".to_string(),
|
|
})?;
|
|
let response = browser_tool.invoke(
|
|
Action::Eval,
|
|
json!({ "script": load_hotlist_extractor_script(top_n)? }),
|
|
ZHIHU_DOMAIN,
|
|
)?;
|
|
if !response.success {
|
|
return Err(PipeError::Protocol(format!(
|
|
"知乎热榜采集失败:{}",
|
|
response
|
|
.data
|
|
.get("error")
|
|
.and_then(|value| value.get("message"))
|
|
.and_then(Value::as_str)
|
|
.unwrap_or("browser script execution failed")
|
|
)));
|
|
}
|
|
|
|
parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data))
|
|
}
|
|
|
|
fn navigate_hotlist_with_retry<T: Transport + 'static>(
|
|
transport: &T,
|
|
browser_tool: &BrowserPipeTool<T>,
|
|
) -> Result<(), PipeError> {
|
|
let mut last_error = None;
|
|
for _ in 0..2 {
|
|
transport.send(&AgentMessage::LogEntry {
|
|
level: "info".to_string(),
|
|
message: format!("navigate {ZHIHU_HOT_URL}"),
|
|
})?;
|
|
match browser_tool.invoke(
|
|
Action::Navigate,
|
|
json!({ "url": ZHIHU_HOT_URL }),
|
|
ZHIHU_DOMAIN,
|
|
) {
|
|
Ok(response) if response.success => return Ok(()),
|
|
Ok(response) => {
|
|
last_error = Some(PipeError::Protocol(format!(
|
|
"navigate failed: {}",
|
|
response.data
|
|
)));
|
|
}
|
|
Err(err) => last_error = Some(err),
|
|
}
|
|
}
|
|
|
|
Err(last_error.unwrap_or_else(|| {
|
|
PipeError::Protocol("navigate failed without detailed error".to_string())
|
|
}))
|
|
}
|
|
|
|
fn export_xlsx<T: Transport>(
|
|
transport: &T,
|
|
workspace_root: &Path,
|
|
items: &[HotlistItem],
|
|
) -> Result<String, PipeError> {
|
|
transport.send(&AgentMessage::LogEntry {
|
|
level: "info".to_string(),
|
|
message: "call openxml_office".to_string(),
|
|
})?;
|
|
let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf());
|
|
let rows = items
|
|
.iter()
|
|
.map(|item| json!([item.rank, item.title, item.heat]))
|
|
.collect::<Vec<_>>();
|
|
let runtime = tokio::runtime::Runtime::new()
|
|
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
|
let result = runtime
|
|
.block_on(tool.execute(json!({
|
|
"sheet_name": "知乎热榜",
|
|
"columns": ["rank", "title", "heat"],
|
|
"rows": rows,
|
|
})))
|
|
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
|
if !result.success {
|
|
return Err(PipeError::Protocol(
|
|
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
|
|
));
|
|
}
|
|
|
|
let payload: Value = serde_json::from_str(&result.output)
|
|
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
|
|
let output_path = payload["output_path"]
|
|
.as_str()
|
|
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
|
|
Ok(format!("已导出知乎热榜 Excel {output_path}"))
|
|
}
|
|
|
|
fn export_screen<T: Transport>(
|
|
transport: &T,
|
|
workspace_root: &Path,
|
|
items: &[HotlistItem],
|
|
) -> Result<String, PipeError> {
|
|
transport.send(&AgentMessage::LogEntry {
|
|
level: "info".to_string(),
|
|
message: "call screen_html_export".to_string(),
|
|
})?;
|
|
let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf());
|
|
let rows = items
|
|
.iter()
|
|
.map(|item| json!([item.rank, item.title, item.heat]))
|
|
.collect::<Vec<_>>();
|
|
let runtime = tokio::runtime::Runtime::new()
|
|
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
|
let result = runtime
|
|
.block_on(tool.execute(json!({ "rows": rows })))
|
|
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
|
if !result.success {
|
|
return Err(PipeError::Protocol(
|
|
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
|
|
));
|
|
}
|
|
|
|
let payload: Value = serde_json::from_str(&result.output)
|
|
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
|
|
let output_path = payload["output_path"]
|
|
.as_str()
|
|
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
|
|
Ok(format!("已生成知乎热榜大屏 {output_path}"))
|
|
}
|
|
|
|
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
|
|
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
.parent()
|
|
.unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")))
|
|
.join("skill_lib")
|
|
.join("skills")
|
|
.join("zhihu-hotlist")
|
|
.join("scripts")
|
|
.join("extract_hotlist.js");
|
|
let script = fs::read_to_string(&script_path).map_err(|err| {
|
|
PipeError::Protocol(format!(
|
|
"failed to read zhihu hotlist extractor script {}: {err}",
|
|
script_path.display()
|
|
))
|
|
})?;
|
|
Ok(format!(
|
|
"(function() {{\nconst args = {};\n{}\n}})()",
|
|
json!({ "top_n": top_n.to_string() }),
|
|
script
|
|
))
|
|
}
|
|
|
|
fn parse_hotlist_items_payload(payload: &Value) -> Result<Vec<HotlistItem>, PipeError> {
|
|
let normalized_payload = if let Some(text) = payload.as_str() {
|
|
serde_json::from_str::<Value>(text).unwrap_or_else(|_| Value::String(text.to_string()))
|
|
} else {
|
|
payload.clone()
|
|
};
|
|
|
|
let rows = normalized_payload
|
|
.get("rows")
|
|
.and_then(Value::as_array)
|
|
.ok_or_else(|| {
|
|
PipeError::Protocol("知乎热榜采集失败:浏览器脚本未返回 rows".to_string())
|
|
})?;
|
|
|
|
let mut items = Vec::new();
|
|
for row in rows {
|
|
let Some(cells) = row.as_array() else {
|
|
continue;
|
|
};
|
|
if cells.len() != 3 {
|
|
continue;
|
|
}
|
|
|
|
let rank = cells[0]
|
|
.as_u64()
|
|
.or_else(|| cells[0].as_str().and_then(|value| value.parse::<u64>().ok()))
|
|
.unwrap_or((items.len() + 1) as u64);
|
|
let title = cells[1].as_str().unwrap_or_default().trim().to_string();
|
|
let heat = cells[2].as_str().unwrap_or_default().trim().to_string();
|
|
if title.is_empty() || heat.is_empty() {
|
|
continue;
|
|
}
|
|
items.push(HotlistItem { rank, title, heat });
|
|
}
|
|
|
|
if items.is_empty() {
|
|
return Err(PipeError::Protocol(
|
|
"知乎热榜采集失败:浏览器脚本未返回有效热榜条目".to_string(),
|
|
));
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
fn extract_top_n(instruction: &str) -> usize {
|
|
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
|
|
re.captures(&instruction.to_ascii_lowercase())
|
|
.and_then(|capture| capture.get(1))
|
|
.and_then(|value| value.as_str().parse::<usize>().ok())
|
|
.filter(|value| *value > 0)
|
|
.unwrap_or(10)
|
|
}
|