Files
claw/src/compat/workflow_executor.rs
2026-03-30 08:29:44 +08:00

286 lines
9.6 KiB
Rust

use std::fs;
use std::path::Path;
use regex::Regex;
use serde_json::{json, Value};
use zeroclaw::tools::Tool;
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkflowRoute {
ZhihuHotlistExportXlsx,
ZhihuHotlistScreen,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct HotlistItem {
rank: u64,
title: String,
heat: String,
}
pub fn detect_route(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> Option<WorkflowRoute> {
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
return None;
}
let normalized = instruction.to_ascii_lowercase();
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
return Some(WorkflowRoute::ZhihuHotlistScreen);
}
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
}
None
}
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
let normalized = summary.to_ascii_lowercase();
if normalized.contains(".xlsx") || normalized.contains(".html") {
return false;
}
let looks_like_denial = summary.contains("拒绝") ||
normalized.contains("denied") ||
normalized.contains("failed") ||
summary.contains("失败") ||
summary.contains("无法");
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
}
pub fn execute_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
workspace_root: &Path,
instruction: &str,
route: WorkflowRoute,
) -> Result<String, PipeError> {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
}
}
fn collect_hotlist_items<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
top_n: usize,
) -> Result<Vec<HotlistItem>, PipeError> {
navigate_hotlist_with_retry(transport, browser_tool)?;
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-hotlist.extract_hotlist".to_string(),
})?;
let response = browser_tool.invoke(
Action::Eval,
json!({ "script": load_hotlist_extractor_script(top_n)? }),
ZHIHU_DOMAIN,
)?;
if !response.success {
return Err(PipeError::Protocol(format!(
"知乎热榜采集失败:{}",
response
.data
.get("error")
.and_then(|value| value.get("message"))
.and_then(Value::as_str)
.unwrap_or("browser script execution failed")
)));
}
parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data))
}
fn navigate_hotlist_with_retry<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
) -> Result<(), PipeError> {
let mut last_error = None;
for _ in 0..2 {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {ZHIHU_HOT_URL}"),
})?;
match browser_tool.invoke(
Action::Navigate,
json!({ "url": ZHIHU_HOT_URL }),
ZHIHU_DOMAIN,
) {
Ok(response) if response.success => return Ok(()),
Ok(response) => {
last_error = Some(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)));
}
Err(err) => last_error = Some(err),
}
}
Err(last_error.unwrap_or_else(|| {
PipeError::Protocol("navigate failed without detailed error".to_string())
}))
}
fn export_xlsx<T: Transport>(
transport: &T,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call openxml_office".to_string(),
})?;
let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf());
let rows = items
.iter()
.map(|item| json!([item.rank, item.title, item.heat]))
.collect::<Vec<_>>();
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
let result = runtime
.block_on(tool.execute(json!({
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": rows,
})))
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
Ok(format!("已导出知乎热榜 Excel {output_path}"))
}
fn export_screen<T: Transport>(
transport: &T,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call screen_html_export".to_string(),
})?;
let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf());
let rows = items
.iter()
.map(|item| json!([item.rank, item.title, item.heat]))
.collect::<Vec<_>>();
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
let result = runtime
.block_on(tool.execute(json!({ "rows": rows })))
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
Ok(format!("已生成知乎热榜大屏 {output_path}"))
}
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")))
.join("skill_lib")
.join("skills")
.join("zhihu-hotlist")
.join("scripts")
.join("extract_hotlist.js");
let script = fs::read_to_string(&script_path).map_err(|err| {
PipeError::Protocol(format!(
"failed to read zhihu hotlist extractor script {}: {err}",
script_path.display()
))
})?;
Ok(format!(
"(function() {{\nconst args = {};\n{}\n}})()",
json!({ "top_n": top_n.to_string() }),
script
))
}
fn parse_hotlist_items_payload(payload: &Value) -> Result<Vec<HotlistItem>, PipeError> {
let normalized_payload = if let Some(text) = payload.as_str() {
serde_json::from_str::<Value>(text).unwrap_or_else(|_| Value::String(text.to_string()))
} else {
payload.clone()
};
let rows = normalized_payload
.get("rows")
.and_then(Value::as_array)
.ok_or_else(|| {
PipeError::Protocol("知乎热榜采集失败:浏览器脚本未返回 rows".to_string())
})?;
let mut items = Vec::new();
for row in rows {
let Some(cells) = row.as_array() else {
continue;
};
if cells.len() != 3 {
continue;
}
let rank = cells[0]
.as_u64()
.or_else(|| cells[0].as_str().and_then(|value| value.parse::<u64>().ok()))
.unwrap_or((items.len() + 1) as u64);
let title = cells[1].as_str().unwrap_or_default().trim().to_string();
let heat = cells[2].as_str().unwrap_or_default().trim().to_string();
if title.is_empty() || heat.is_empty() {
continue;
}
items.push(HotlistItem { rank, title, heat });
}
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:浏览器脚本未返回有效热榜条目".to_string(),
));
}
Ok(items)
}
fn extract_top_n(instruction: &str) -> usize {
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
re.captures(&instruction.to_ascii_lowercase())
.and_then(|capture| capture.get(1))
.and_then(|value| value.as_str().parse::<usize>().ok())
.filter(|value| *value > 0)
.unwrap_or(10)
}