wip: checkpoint 2026-03-29 runtime work
This commit is contained in:
346
src/compat/workflow_executor.rs
Normal file
346
src/compat/workflow_executor.rs
Normal file
@@ -0,0 +1,346 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::path::Path;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::{json, Value};
|
||||
use zeroclaw::tools::Tool;
|
||||
|
||||
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
|
||||
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
|
||||
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
|
||||
|
||||
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
|
||||
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
|
||||
const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"];
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum WorkflowRoute {
|
||||
ZhihuHotlistExportXlsx,
|
||||
ZhihuHotlistScreen,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct HotlistItem {
|
||||
rank: u64,
|
||||
title: String,
|
||||
heat: String,
|
||||
}
|
||||
|
||||
pub fn detect_route(
|
||||
instruction: &str,
|
||||
page_url: Option<&str>,
|
||||
page_title: Option<&str>,
|
||||
) -> Option<WorkflowRoute> {
|
||||
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let normalized = instruction.to_ascii_lowercase();
|
||||
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
|
||||
return Some(WorkflowRoute::ZhihuHotlistScreen);
|
||||
}
|
||||
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
|
||||
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
|
||||
let normalized = summary.to_ascii_lowercase();
|
||||
if normalized.contains(".xlsx") || normalized.contains(".html") {
|
||||
return false;
|
||||
}
|
||||
|
||||
let looks_like_denial = summary.contains("拒绝") ||
|
||||
normalized.contains("denied") ||
|
||||
normalized.contains("failed") ||
|
||||
summary.contains("失败") ||
|
||||
summary.contains("无法");
|
||||
|
||||
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
|
||||
}
|
||||
|
||||
pub fn execute_route<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
workspace_root: &Path,
|
||||
instruction: &str,
|
||||
route: WorkflowRoute,
|
||||
) -> Result<String, PipeError> {
|
||||
let top_n = extract_top_n(instruction);
|
||||
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
|
||||
if items.is_empty() {
|
||||
return Err(PipeError::Protocol(
|
||||
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
match route {
|
||||
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
|
||||
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_hotlist_items<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
top_n: usize,
|
||||
) -> Result<Vec<HotlistItem>, PipeError> {
|
||||
navigate_hotlist_with_retry(transport, browser_tool)?;
|
||||
|
||||
for selector in HOTLIST_ROOT_SELECTORS {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: format!("getText {selector}"),
|
||||
})?;
|
||||
let response = browser_tool.invoke(
|
||||
Action::GetText,
|
||||
json!({ "selector": selector }),
|
||||
ZHIHU_DOMAIN,
|
||||
)?;
|
||||
if !response.success {
|
||||
continue;
|
||||
}
|
||||
let text = response.data["text"].as_str().unwrap_or_default();
|
||||
let items = parse_hotlist_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return Ok(items);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn navigate_hotlist_with_retry<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
) -> Result<(), PipeError> {
|
||||
let mut last_error = None;
|
||||
for _ in 0..2 {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: format!("navigate {ZHIHU_HOT_URL}"),
|
||||
})?;
|
||||
match browser_tool.invoke(
|
||||
Action::Navigate,
|
||||
json!({ "url": ZHIHU_HOT_URL }),
|
||||
ZHIHU_DOMAIN,
|
||||
) {
|
||||
Ok(response) if response.success => return Ok(()),
|
||||
Ok(response) => {
|
||||
last_error = Some(PipeError::Protocol(format!(
|
||||
"navigate failed: {}",
|
||||
response.data
|
||||
)));
|
||||
}
|
||||
Err(err) => last_error = Some(err),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
PipeError::Protocol("navigate failed without detailed error".to_string())
|
||||
}))
|
||||
}
|
||||
|
||||
fn export_xlsx<T: Transport>(
|
||||
transport: &T,
|
||||
workspace_root: &Path,
|
||||
items: &[HotlistItem],
|
||||
) -> Result<String, PipeError> {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: "call openxml_office".to_string(),
|
||||
})?;
|
||||
let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf());
|
||||
let rows = items
|
||||
.iter()
|
||||
.map(|item| json!([item.rank, item.title, item.heat]))
|
||||
.collect::<Vec<_>>();
|
||||
let runtime = tokio::runtime::Runtime::new()
|
||||
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
||||
let result = runtime
|
||||
.block_on(tool.execute(json!({
|
||||
"sheet_name": "知乎热榜",
|
||||
"columns": ["rank", "title", "heat"],
|
||||
"rows": rows,
|
||||
})))
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
if !result.success {
|
||||
return Err(PipeError::Protocol(
|
||||
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
|
||||
));
|
||||
}
|
||||
|
||||
let payload: Value = serde_json::from_str(&result.output)
|
||||
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
|
||||
let output_path = payload["output_path"]
|
||||
.as_str()
|
||||
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
|
||||
Ok(format!("已导出知乎热榜 Excel {output_path}"))
|
||||
}
|
||||
|
||||
fn export_screen<T: Transport>(
|
||||
transport: &T,
|
||||
workspace_root: &Path,
|
||||
items: &[HotlistItem],
|
||||
) -> Result<String, PipeError> {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: "call screen_html_export".to_string(),
|
||||
})?;
|
||||
let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf());
|
||||
let rows = items
|
||||
.iter()
|
||||
.map(|item| json!([item.rank, item.title, item.heat]))
|
||||
.collect::<Vec<_>>();
|
||||
let runtime = tokio::runtime::Runtime::new()
|
||||
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
||||
let result = runtime
|
||||
.block_on(tool.execute(json!({ "rows": rows })))
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
if !result.success {
|
||||
return Err(PipeError::Protocol(
|
||||
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
|
||||
));
|
||||
}
|
||||
|
||||
let payload: Value = serde_json::from_str(&result.output)
|
||||
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
|
||||
let output_path = payload["output_path"]
|
||||
.as_str()
|
||||
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
|
||||
Ok(format!("已生成知乎热榜大屏 {output_path}"))
|
||||
}
|
||||
|
||||
fn parse_hotlist_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let mut items = parse_single_line_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return items;
|
||||
}
|
||||
|
||||
let lines = normalize_lines(text);
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
let mut idx = 0usize;
|
||||
|
||||
while idx < lines.len() && items.len() < top_n {
|
||||
let Some(rank) = parse_rank(&lines[idx]) else {
|
||||
idx += 1;
|
||||
continue;
|
||||
};
|
||||
if !seen_ranks.insert(rank) {
|
||||
idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut title = None;
|
||||
let mut heat = None;
|
||||
for candidate in lines.iter().skip(idx + 1).take(6) {
|
||||
if parse_rank(candidate).is_some() {
|
||||
break;
|
||||
}
|
||||
if heat.is_none() && looks_like_heat(candidate) {
|
||||
heat = Some(normalize_heat(candidate));
|
||||
continue;
|
||||
}
|
||||
if title.is_none() && !is_noise_line(candidate) {
|
||||
title = Some(candidate.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(title), Some(heat)) = (title, heat) {
|
||||
items.push(HotlistItem { rank, title, heat });
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
items.sort_by_key(|item| item.rank);
|
||||
items.truncate(top_n);
|
||||
items
|
||||
}
|
||||
|
||||
fn parse_single_line_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let re = Regex::new(
|
||||
r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$",
|
||||
)
|
||||
.expect("valid hotlist single-line regex");
|
||||
let mut items = Vec::new();
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
|
||||
for capture in re.captures_iter(text) {
|
||||
let rank = capture
|
||||
.get(1)
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.unwrap_or_default();
|
||||
if rank == 0 || !seen_ranks.insert(rank) {
|
||||
continue;
|
||||
}
|
||||
let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
if title.is_empty() || heat.is_empty() {
|
||||
continue;
|
||||
}
|
||||
items.push(HotlistItem {
|
||||
rank,
|
||||
title: title.to_string(),
|
||||
heat: normalize_heat(heat),
|
||||
});
|
||||
if items.len() >= top_n {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn normalize_lines(text: &str) -> Vec<String> {
|
||||
text.lines()
|
||||
.map(str::trim)
|
||||
.filter(|line| !line.is_empty())
|
||||
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_rank(line: &str) -> Option<u64> {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if trimmed.chars().all(|ch| ch.is_ascii_digit()) {
|
||||
return trimmed.parse::<u64>().ok().filter(|value| *value > 0);
|
||||
}
|
||||
|
||||
let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex");
|
||||
rank_re
|
||||
.captures(trimmed)
|
||||
.and_then(|capture| capture.get(1))
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.filter(|value| *value > 0)
|
||||
}
|
||||
|
||||
fn looks_like_heat(line: &str) -> bool {
|
||||
let compact = line.replace(' ', "");
|
||||
let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex");
|
||||
heat_re.is_match(compact.as_str())
|
||||
}
|
||||
|
||||
fn normalize_heat(line: &str) -> String {
|
||||
line.replace(' ', "")
|
||||
.trim_end_matches("热度")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn is_noise_line(line: &str) -> bool {
|
||||
matches!(
|
||||
line,
|
||||
"知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容"
|
||||
)
|
||||
}
|
||||
|
||||
fn extract_top_n(instruction: &str) -> usize {
|
||||
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
|
||||
re.captures(&instruction.to_ascii_lowercase())
|
||||
.and_then(|capture| capture.get(1))
|
||||
.and_then(|value| value.as_str().parse::<usize>().ok())
|
||||
.filter(|value| *value > 0)
|
||||
.unwrap_or(10)
|
||||
}
|
||||
Reference in New Issue
Block a user