wip: checkpoint 2026-03-29 runtime work
This commit is contained in:
@@ -1,29 +1,70 @@
|
||||
use async_trait::async_trait;
|
||||
use reqwest::Url;
|
||||
use serde_json::{json, Map, Value};
|
||||
use zeroclaw::tools::{Tool, ToolResult};
|
||||
|
||||
use crate::pipe::{Action, BrowserPipeTool, Transport};
|
||||
use crate::pipe::{Action, BrowserPipeTool, ExecutionSurfaceMetadata, Transport};
|
||||
|
||||
pub const BROWSER_ACTION_TOOL_NAME: &str = "browser_action";
|
||||
pub const SUPERRPA_BROWSER_TOOL_NAME: &str = "superrpa_browser";
|
||||
const BROWSER_ACTION_TOOL_DESCRIPTION: &str =
|
||||
"Execute browser actions in SuperRPA through the existing sgClaw pipe protocol.";
|
||||
const SUPERRPA_BROWSER_TOOL_DESCRIPTION: &str =
|
||||
"Use SuperRPA's dedicated privileged browser interface for page navigation, DOM reading, clicking, and typing inside the protected browser host.";
|
||||
const MAX_DATA_STRING_CHARS: usize = 2048;
|
||||
const MAX_AOM_STRING_CHARS: usize = 128;
|
||||
const MAX_DATA_ARRAY_ITEMS: usize = 12;
|
||||
const MAX_DATA_OBJECT_FIELDS: usize = 24;
|
||||
const MAX_DATA_RECURSION_DEPTH: usize = 4;
|
||||
|
||||
pub struct ZeroClawBrowserTool<T: Transport> {
|
||||
browser_tool: BrowserPipeTool<T>,
|
||||
tool_name: &'static str,
|
||||
description: &'static str,
|
||||
}
|
||||
|
||||
impl<T: Transport> ZeroClawBrowserTool<T> {
|
||||
pub fn new(browser_tool: BrowserPipeTool<T>) -> Self {
|
||||
Self { browser_tool }
|
||||
Self::named(
|
||||
browser_tool,
|
||||
BROWSER_ACTION_TOOL_NAME,
|
||||
BROWSER_ACTION_TOOL_DESCRIPTION,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn new_superrpa(browser_tool: BrowserPipeTool<T>) -> Self {
|
||||
Self::named(
|
||||
browser_tool,
|
||||
SUPERRPA_BROWSER_TOOL_NAME,
|
||||
SUPERRPA_BROWSER_TOOL_DESCRIPTION,
|
||||
)
|
||||
}
|
||||
|
||||
fn named(
|
||||
browser_tool: BrowserPipeTool<T>,
|
||||
tool_name: &'static str,
|
||||
description: &'static str,
|
||||
) -> Self {
|
||||
Self {
|
||||
browser_tool,
|
||||
tool_name,
|
||||
description,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
|
||||
self.browser_tool.surface_metadata()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
|
||||
fn name(&self) -> &str {
|
||||
BROWSER_ACTION_TOOL_NAME
|
||||
self.tool_name
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Execute browser actions in SuperRPA through the existing sgClaw pipe protocol."
|
||||
self.description
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
@@ -72,8 +113,9 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
|
||||
let output = serde_json::to_string(&json!({
|
||||
"seq": result.seq,
|
||||
"success": result.success,
|
||||
"data": result.data,
|
||||
"aom_snapshot": result.aom_snapshot,
|
||||
"data": compact_json_value(&result.data, 0),
|
||||
"aom_snapshot": compact_aom_snapshot(&result.aom_snapshot),
|
||||
"aom_snapshot_count": result.aom_snapshot.len(),
|
||||
"timing": result.timing
|
||||
}))?;
|
||||
|
||||
@@ -103,9 +145,10 @@ fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, Bro
|
||||
};
|
||||
|
||||
let action_name = take_required_string(&mut args, "action")?;
|
||||
let expected_domain = take_required_string(&mut args, "expected_domain")?;
|
||||
let raw_expected_domain = take_required_string(&mut args, "expected_domain")?;
|
||||
let action = parse_action(&action_name)?;
|
||||
validate_action_params(&action_name, &args)?;
|
||||
let expected_domain = normalize_expected_domain(&action, &raw_expected_domain, &args)?;
|
||||
|
||||
Ok(BrowserActionRequest {
|
||||
action,
|
||||
@@ -178,6 +221,59 @@ fn require_non_empty_string(
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_expected_domain(
|
||||
action: &Action,
|
||||
raw_expected_domain: &str,
|
||||
args: &Map<String, Value>,
|
||||
) -> Result<String, BrowserActionAdapterError> {
|
||||
if matches!(action, Action::Navigate) {
|
||||
if let Some(url) = args.get("url").and_then(Value::as_str) {
|
||||
if let Some(host) = host_from_url(url) {
|
||||
return Ok(host);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
normalize_domain_like(raw_expected_domain).ok_or_else(|| {
|
||||
BrowserActionAdapterError::InvalidArguments(format!(
|
||||
"expected_domain must resolve to a hostname, got {raw_expected_domain:?}"
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
fn host_from_url(raw: &str) -> Option<String> {
|
||||
Url::parse(raw)
|
||||
.ok()?
|
||||
.host_str()
|
||||
.map(|host| host.to_ascii_lowercase())
|
||||
}
|
||||
|
||||
fn normalize_domain_like(raw: &str) -> Option<String> {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(host) = host_from_url(trimmed) {
|
||||
return Some(host);
|
||||
}
|
||||
|
||||
let without_scheme = trimmed
|
||||
.trim_start_matches("https://")
|
||||
.trim_start_matches("http://");
|
||||
let host = without_scheme
|
||||
.split(['/', '?', '#'])
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.split(':')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
(!host.is_empty()).then_some(host)
|
||||
}
|
||||
|
||||
fn format_browser_action_error(data: &Value) -> String {
|
||||
if let Some(error) = data.get("error") {
|
||||
if let Some(message) = error.get("message").and_then(Value::as_str) {
|
||||
@@ -193,6 +289,111 @@ fn format_browser_action_error(data: &Value) -> String {
|
||||
format!("browser action failed: {data}")
|
||||
}
|
||||
|
||||
fn compact_json_value(value: &Value, depth: usize) -> Value {
|
||||
compact_json_value_with_string_limit(value, depth, MAX_DATA_STRING_CHARS)
|
||||
}
|
||||
|
||||
fn compact_aom_snapshot(snapshot: &[Value]) -> Value {
|
||||
Value::Array(
|
||||
snapshot
|
||||
.iter()
|
||||
.take(MAX_DATA_ARRAY_ITEMS)
|
||||
.map(|item| compact_aom_value(item, 0))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn compact_aom_value(value: &Value, depth: usize) -> Value {
|
||||
if depth >= MAX_DATA_RECURSION_DEPTH {
|
||||
return Value::String("[truncated nested value]".to_string());
|
||||
}
|
||||
|
||||
match value {
|
||||
Value::Object(map) => {
|
||||
let mut compacted = Map::new();
|
||||
for (key, item) in map.iter().take(MAX_DATA_OBJECT_FIELDS) {
|
||||
if matches!(key.as_str(), "text" | "value" | "html") {
|
||||
let summary = item
|
||||
.as_str()
|
||||
.map(|text| format!("[{} chars omitted]", text.chars().count()))
|
||||
.unwrap_or_else(|| "[omitted]".to_string());
|
||||
compacted.insert(key.clone(), Value::String(summary));
|
||||
continue;
|
||||
}
|
||||
|
||||
compacted.insert(key.clone(), compact_aom_value(item, depth + 1));
|
||||
}
|
||||
Value::Object(compacted)
|
||||
}
|
||||
Value::Array(items) => Value::Array(
|
||||
items
|
||||
.iter()
|
||||
.take(MAX_DATA_ARRAY_ITEMS)
|
||||
.map(|item| compact_aom_value(item, depth + 1))
|
||||
.collect(),
|
||||
),
|
||||
_ => compact_json_value_with_string_limit(value, depth, MAX_AOM_STRING_CHARS),
|
||||
}
|
||||
}
|
||||
|
||||
fn compact_json_value_with_string_limit(
|
||||
value: &Value,
|
||||
depth: usize,
|
||||
max_string_chars: usize,
|
||||
) -> Value {
|
||||
if depth >= MAX_DATA_RECURSION_DEPTH {
|
||||
return Value::String("[truncated nested value]".to_string());
|
||||
}
|
||||
|
||||
match value {
|
||||
Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
|
||||
Value::String(text) => Value::String(truncate_string(text, max_string_chars)),
|
||||
Value::Array(items) => {
|
||||
let mut compacted: Vec<Value> = items
|
||||
.iter()
|
||||
.take(MAX_DATA_ARRAY_ITEMS)
|
||||
.map(|item| compact_json_value_with_string_limit(item, depth + 1, max_string_chars))
|
||||
.collect();
|
||||
if items.len() > MAX_DATA_ARRAY_ITEMS {
|
||||
compacted.push(Value::String(format!(
|
||||
"[{} more items omitted]",
|
||||
items.len() - MAX_DATA_ARRAY_ITEMS
|
||||
)));
|
||||
}
|
||||
Value::Array(compacted)
|
||||
}
|
||||
Value::Object(map) => {
|
||||
let mut compacted = Map::new();
|
||||
for (key, item) in map.iter().take(MAX_DATA_OBJECT_FIELDS) {
|
||||
compacted.insert(
|
||||
key.clone(),
|
||||
compact_json_value_with_string_limit(item, depth + 1, max_string_chars),
|
||||
);
|
||||
}
|
||||
if map.len() > MAX_DATA_OBJECT_FIELDS {
|
||||
compacted.insert(
|
||||
"_truncated_fields".to_string(),
|
||||
Value::String(format!(
|
||||
"{} additional fields omitted",
|
||||
map.len() - MAX_DATA_OBJECT_FIELDS
|
||||
)),
|
||||
);
|
||||
}
|
||||
Value::Object(compacted)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_string(text: &str, max_chars: usize) -> String {
|
||||
let total_chars = text.chars().count();
|
||||
if total_chars <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
|
||||
let prefix: String = text.chars().take(max_chars).collect();
|
||||
format!("{prefix}...[truncated {} chars]", total_chars - max_chars)
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum BrowserActionAdapterError {
|
||||
#[error("unsupported action: {0}")]
|
||||
|
||||
@@ -18,7 +18,15 @@ pub fn log_entry_for_turn_event(event: &TurnEvent) -> Option<AgentMessage> {
|
||||
}
|
||||
|
||||
fn format_tool_call(name: &str, args: &Value) -> String {
|
||||
if name != "browser_action" {
|
||||
if name == "read_skill" {
|
||||
let skill_name = args
|
||||
.get("name")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("<missing-skill>");
|
||||
return format!("read_skill {skill_name}");
|
||||
}
|
||||
|
||||
if !is_browser_tool_call(name) {
|
||||
return format!("call {name}");
|
||||
}
|
||||
|
||||
@@ -54,10 +62,14 @@ fn format_tool_call(name: &str, args: &Value) -> String {
|
||||
.unwrap_or("<missing-selector>");
|
||||
format!("getText {selector}")
|
||||
}
|
||||
other => format!("browser_action {other}"),
|
||||
other => format!("{name} {other}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_browser_tool_call(name: &str) -> bool {
|
||||
name == "browser_action" || name == "superrpa_browser"
|
||||
}
|
||||
|
||||
fn is_tool_error(output: &str) -> bool {
|
||||
output.starts_with("Error:")
|
||||
}
|
||||
|
||||
@@ -3,4 +3,8 @@ pub mod config_adapter;
|
||||
pub mod cron_adapter;
|
||||
pub mod event_bridge;
|
||||
pub mod memory_adapter;
|
||||
pub mod openxml_office_tool;
|
||||
pub mod orchestration;
|
||||
pub mod runtime;
|
||||
pub mod screen_html_export_tool;
|
||||
pub mod workflow_executor;
|
||||
|
||||
392
src/compat/openxml_office_tool.rs
Normal file
392
src/compat/openxml_office_tool.rs
Normal file
@@ -0,0 +1,392 @@
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use zeroclaw::tools::{Tool, ToolResult};
|
||||
|
||||
const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office";
|
||||
const DEFAULT_SHEET_NAME: &str = "知乎热榜";
|
||||
const MAX_COLUMNS: [&str; 3] = ["rank", "title", "heat"];
|
||||
|
||||
pub struct OpenXmlOfficeTool {
|
||||
workspace_root: PathBuf,
|
||||
}
|
||||
|
||||
impl OpenXmlOfficeTool {
|
||||
pub fn new(workspace_root: PathBuf) -> Self {
|
||||
Self { workspace_root }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenXmlOfficeArgs {
|
||||
sheet_name: String,
|
||||
columns: Vec<String>,
|
||||
rows: Vec<Vec<Value>>,
|
||||
#[serde(default)]
|
||||
output_path: Option<String>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for OpenXmlOfficeTool {
|
||||
fn name(&self) -> &str {
|
||||
OPENXML_OFFICE_TOOL_NAME
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Export structured Zhihu hotlist rows into a local .xlsx file through the OpenXML office pipeline."
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"required": ["sheet_name", "columns", "rows"],
|
||||
"properties": {
|
||||
"sheet_name": { "type": "string" },
|
||||
"columns": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {}
|
||||
}
|
||||
},
|
||||
"output_path": { "type": "string" }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
|
||||
let parsed = match serde_json::from_value::<OpenXmlOfficeArgs>(args) {
|
||||
Ok(value) => value,
|
||||
Err(err) => return Ok(failed_tool_result(format!("invalid tool arguments: {err}"))),
|
||||
};
|
||||
|
||||
if parsed.sheet_name.trim() != DEFAULT_SHEET_NAME {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"unsupported sheet_name: expected {DEFAULT_SHEET_NAME}"
|
||||
)));
|
||||
}
|
||||
|
||||
let expected_columns = MAX_COLUMNS
|
||||
.iter()
|
||||
.map(|value| value.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
if parsed.columns != expected_columns {
|
||||
return Ok(failed_tool_result(
|
||||
"unsupported columns: expected [rank, title, heat]".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if parsed.rows.is_empty() {
|
||||
return Ok(failed_tool_result("rows must not be empty".to_string()));
|
||||
}
|
||||
|
||||
if parsed.rows.iter().any(|row| row.len() != 3) {
|
||||
return Ok(failed_tool_result(
|
||||
"each row must contain exactly 3 values".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let job_root = create_job_root(&self.workspace_root)?;
|
||||
let template_path = job_root.join("zhihu_hotlist_template.xlsx");
|
||||
let payload_path = job_root.join("payload.json");
|
||||
let request_path = job_root.join("request.json");
|
||||
let output_path = parsed
|
||||
.output_path
|
||||
.as_deref()
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| default_output_path(&self.workspace_root));
|
||||
|
||||
write_hotlist_template(&template_path, parsed.rows.len())?;
|
||||
write_payload_json(&payload_path, &parsed.rows)?;
|
||||
write_request_json(&request_path, &template_path, &payload_path, &output_path)?;
|
||||
|
||||
let rendered = run_openxml_cli(&request_path)?;
|
||||
let artifact_path = rendered["data"]["artifact"]["path"]
|
||||
.as_str()
|
||||
.map(str::to_string)
|
||||
.unwrap_or_else(|| output_path.to_string_lossy().to_string());
|
||||
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: json!({
|
||||
"sheet_name": DEFAULT_SHEET_NAME,
|
||||
"output_path": artifact_path,
|
||||
"row_count": parsed.rows.len(),
|
||||
"renderer": OPENXML_OFFICE_TOOL_NAME
|
||||
})
|
||||
.to_string(),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn failed_tool_result(error: String) -> ToolResult {
|
||||
ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_job_root(workspace_root: &Path) -> anyhow::Result<PathBuf> {
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)?
|
||||
.as_nanos();
|
||||
let path = workspace_root.join(".sgclaw-openxml").join(format!("{nanos}"));
|
||||
fs::create_dir_all(&path)?;
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
fn default_output_path(workspace_root: &Path) -> PathBuf {
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|value| value.as_nanos())
|
||||
.unwrap_or_default();
|
||||
workspace_root
|
||||
.join("out")
|
||||
.join(format!("zhihu-hotlist-{nanos}.xlsx"))
|
||||
}
|
||||
|
||||
fn write_payload_json(path: &Path, rows: &[Vec<Value>]) -> anyhow::Result<()> {
|
||||
let mut variables = BTreeMap::new();
|
||||
for (idx, row) in rows.iter().enumerate() {
|
||||
let row_index = idx + 1;
|
||||
variables.insert(format!("RANK_{row_index}"), value_to_string(&row[0]));
|
||||
variables.insert(format!("TITLE_{row_index}"), value_to_string(&row[1]));
|
||||
variables.insert(format!("HEAT_{row_index}"), value_to_string(&row[2]));
|
||||
}
|
||||
|
||||
let payload = json!({
|
||||
"variables": variables,
|
||||
"tables": {},
|
||||
"images": {}
|
||||
});
|
||||
fs::write(path, serde_json::to_vec_pretty(&payload)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_request_json(
|
||||
path: &Path,
|
||||
template_path: &Path,
|
||||
payload_path: &Path,
|
||||
output_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some(parent) = output_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
let request = json!({
|
||||
"api_version": "2026-03-26",
|
||||
"job": "zhihu_hotlist_export",
|
||||
"template": {
|
||||
"kind": "xlsx",
|
||||
"path": template_path
|
||||
},
|
||||
"output": {
|
||||
"path": output_path
|
||||
},
|
||||
"data": {
|
||||
"json_path": payload_path
|
||||
},
|
||||
"options": {
|
||||
"strict": true,
|
||||
"allow_unresolved": false,
|
||||
"dry_run": false
|
||||
}
|
||||
});
|
||||
fs::write(path, serde_json::to_vec_pretty(&request)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_openxml_cli(request_path: &Path) -> anyhow::Result<Value> {
|
||||
let manifest_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.map(|path| path.join("openxml_cli").join("Cargo.toml"))
|
||||
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli manifest path"))?;
|
||||
let binary_path = manifest_path
|
||||
.parent()
|
||||
.map(|path| path.join("target").join("debug").join("openxml-cli"))
|
||||
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli binary path"))?;
|
||||
|
||||
let output = if binary_path.exists() {
|
||||
Command::new(&binary_path)
|
||||
.args([
|
||||
"template",
|
||||
"render",
|
||||
"--request",
|
||||
request_path.to_string_lossy().as_ref(),
|
||||
"--json",
|
||||
])
|
||||
.output()?
|
||||
} else {
|
||||
Command::new("cargo")
|
||||
.args([
|
||||
"run",
|
||||
"--quiet",
|
||||
"--manifest-path",
|
||||
manifest_path.to_string_lossy().as_ref(),
|
||||
"--",
|
||||
"template",
|
||||
"render",
|
||||
"--request",
|
||||
request_path.to_string_lossy().as_ref(),
|
||||
"--json",
|
||||
])
|
||||
.output()?
|
||||
};
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
|
||||
return Err(anyhow::anyhow!(if stderr.is_empty() {
|
||||
"openxml_cli render failed".to_string()
|
||||
} else {
|
||||
stderr
|
||||
}));
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8(output.stdout)?;
|
||||
Ok(serde_json::from_str(&stdout)?)
|
||||
}
|
||||
|
||||
fn value_to_string(value: &Value) -> String {
|
||||
match value {
|
||||
Value::String(text) => text.clone(),
|
||||
Value::Number(number) => number.to_string(),
|
||||
Value::Bool(flag) => flag.to_string(),
|
||||
Value::Null => String::new(),
|
||||
other => other.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn write_hotlist_template(path: &Path, row_count: usize) -> anyhow::Result<()> {
|
||||
let build_root = path
|
||||
.parent()
|
||||
.ok_or_else(|| anyhow::anyhow!("template path has no parent"))?
|
||||
.join("template-build");
|
||||
fs::create_dir_all(build_root.join("_rels"))?;
|
||||
fs::create_dir_all(build_root.join("docProps"))?;
|
||||
fs::create_dir_all(build_root.join("xl/_rels"))?;
|
||||
fs::create_dir_all(build_root.join("xl/worksheets"))?;
|
||||
|
||||
fs::write(build_root.join("[Content_Types].xml"), content_types_xml())?;
|
||||
fs::write(build_root.join("_rels/.rels"), root_rels_xml())?;
|
||||
fs::write(build_root.join("docProps/app.xml"), app_xml())?;
|
||||
fs::write(build_root.join("docProps/core.xml"), core_xml())?;
|
||||
fs::write(build_root.join("xl/workbook.xml"), workbook_xml())?;
|
||||
fs::write(
|
||||
build_root.join("xl/_rels/workbook.xml.rels"),
|
||||
workbook_rels_xml(),
|
||||
)?;
|
||||
fs::write(
|
||||
build_root.join("xl/worksheets/sheet1.xml"),
|
||||
worksheet_xml(row_count),
|
||||
)?;
|
||||
|
||||
if path.exists() {
|
||||
fs::remove_file(path)?;
|
||||
}
|
||||
|
||||
let zip = Command::new("zip")
|
||||
.current_dir(&build_root)
|
||||
.args(["-q", "-r", path.to_string_lossy().as_ref(), "."])
|
||||
.output()?;
|
||||
if !zip.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&zip.stderr);
|
||||
return Err(anyhow::anyhow!(format!(
|
||||
"failed to create xlsx template: {}",
|
||||
stderr.trim()
|
||||
)));
|
||||
}
|
||||
|
||||
let _ = fs::remove_dir_all(&build_root);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn worksheet_xml(row_count: usize) -> String {
|
||||
let mut rows = Vec::new();
|
||||
rows.push(
|
||||
"<row r=\"1\"><c r=\"A1\" t=\"inlineStr\"><is><t>rank</t></is></c><c r=\"B1\" t=\"inlineStr\"><is><t>title</t></is></c><c r=\"C1\" t=\"inlineStr\"><is><t>heat</t></is></c></row>"
|
||||
.to_string(),
|
||||
);
|
||||
|
||||
for idx in 1..=row_count {
|
||||
let excel_row = idx + 1;
|
||||
rows.push(format!(
|
||||
"<row r=\"{excel_row}\"><c r=\"A{excel_row}\" t=\"inlineStr\"><is><t>{{{{RANK_{idx}}}}}</t></is></c><c r=\"B{excel_row}\" t=\"inlineStr\"><is><t>{{{{TITLE_{idx}}}}}</t></is></c><c r=\"C{excel_row}\" t=\"inlineStr\"><is><t>{{{{HEAT_{idx}}}}}</t></is></c></row>"
|
||||
));
|
||||
}
|
||||
|
||||
format!(
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\
|
||||
<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">\
|
||||
<sheetData>{}</sheetData>\
|
||||
</worksheet>",
|
||||
rows.join("")
|
||||
)
|
||||
}
|
||||
|
||||
fn content_types_xml() -> &'static str {
|
||||
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
|
||||
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
|
||||
<Default Extension="xml" ContentType="application/xml"/>
|
||||
<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
|
||||
<Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
|
||||
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
|
||||
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
|
||||
</Types>"#
|
||||
}
|
||||
|
||||
fn root_rels_xml() -> &'static str {
|
||||
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
||||
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
|
||||
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
|
||||
<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
|
||||
</Relationships>"#
|
||||
}
|
||||
|
||||
fn app_xml() -> &'static str {
|
||||
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
|
||||
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
|
||||
<Application>sgClaw</Application>
|
||||
</Properties>"#
|
||||
}
|
||||
|
||||
fn core_xml() -> &'static str {
|
||||
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<dc:title>Zhihu Hotlist Export</dc:title>
|
||||
</cp:coreProperties>"#
|
||||
}
|
||||
|
||||
fn workbook_xml() -> &'static str {
|
||||
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
||||
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
|
||||
<sheets>
|
||||
<sheet name="知乎热榜" sheetId="1" r:id="rId1"/>
|
||||
</sheets>
|
||||
</workbook>"#
|
||||
}
|
||||
|
||||
fn workbook_rels_xml() -> &'static str {
|
||||
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
||||
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
|
||||
</Relationships>"#
|
||||
}
|
||||
67
src/compat/orchestration.rs
Normal file
67
src/compat/orchestration.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use std::path::Path;
|
||||
|
||||
use crate::compat::runtime::CompatTaskContext;
|
||||
use crate::config::SgClawSettings;
|
||||
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
|
||||
|
||||
pub fn should_use_primary_orchestration(
|
||||
instruction: &str,
|
||||
page_url: Option<&str>,
|
||||
page_title: Option<&str>,
|
||||
) -> bool {
|
||||
let normalized = instruction.to_ascii_lowercase();
|
||||
let needs_export = normalized.contains("excel")
|
||||
|| normalized.contains("xlsx")
|
||||
|| instruction.contains("导出")
|
||||
|| instruction.contains("大屏")
|
||||
|| instruction.contains("新标签页")
|
||||
|| normalized.contains("dashboard");
|
||||
|
||||
crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) && needs_export
|
||||
}
|
||||
|
||||
pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: BrowserPipeTool<T>,
|
||||
instruction: &str,
|
||||
task_context: &CompatTaskContext,
|
||||
workspace_root: &Path,
|
||||
settings: &SgClawSettings,
|
||||
) -> Result<String, PipeError> {
|
||||
let route = crate::compat::workflow_executor::detect_route(
|
||||
instruction,
|
||||
task_context.page_url.as_deref(),
|
||||
task_context.page_title.as_deref(),
|
||||
);
|
||||
let primary_result = crate::compat::runtime::execute_task_with_sgclaw_settings(
|
||||
transport,
|
||||
browser_tool.clone(),
|
||||
instruction,
|
||||
task_context,
|
||||
workspace_root,
|
||||
settings,
|
||||
);
|
||||
|
||||
match (route, primary_result) {
|
||||
(Some(route), Ok(summary))
|
||||
if crate::compat::workflow_executor::should_fallback_after_summary(&summary, &route) =>
|
||||
{
|
||||
crate::compat::workflow_executor::execute_route(
|
||||
transport,
|
||||
&browser_tool,
|
||||
workspace_root,
|
||||
instruction,
|
||||
route,
|
||||
)
|
||||
}
|
||||
(_, Ok(summary)) => Ok(summary),
|
||||
(Some(route), Err(_)) => crate::compat::workflow_executor::execute_route(
|
||||
transport,
|
||||
&browser_tool,
|
||||
workspace_root,
|
||||
instruction,
|
||||
route,
|
||||
),
|
||||
(None, Err(err)) => Err(err),
|
||||
}
|
||||
}
|
||||
382
src/compat/screen_html_export_tool.rs
Normal file
382
src/compat/screen_html_export_tool.rs
Normal file
@@ -0,0 +1,382 @@
|
||||
use async_trait::async_trait;
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use zeroclaw::tools::{Tool, ToolResult};
|
||||
|
||||
const SCREEN_HTML_EXPORT_TOOL_NAME: &str = "screen_html_export";
|
||||
const DEFAULT_SCREEN_TITLE: &str = "知乎热榜主题分类分析大屏";
|
||||
const TEMPLATE: &str = include_str!(concat!(
|
||||
env!("CARGO_MANIFEST_DIR"),
|
||||
"/../skill_lib/skills/zhihu-hotlist-screen/assets/zhihu-hotlist-echarts.html"
|
||||
));
|
||||
const PAYLOAD_START_MARKER: &str = " const defaultPayload = ";
|
||||
const PAYLOAD_END_MARKER: &str = "\n\n const themeMeta = {";
|
||||
|
||||
pub struct ScreenHtmlExportTool {
|
||||
workspace_root: PathBuf,
|
||||
}
|
||||
|
||||
impl ScreenHtmlExportTool {
|
||||
pub fn new(workspace_root: PathBuf) -> Self {
|
||||
Self { workspace_root }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ScreenHtmlExportArgs {
|
||||
#[serde(default)]
|
||||
snapshot_id: Option<String>,
|
||||
#[serde(default)]
|
||||
generated_at_ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
rows: Option<Vec<Vec<Value>>>,
|
||||
#[serde(default)]
|
||||
table: Option<Vec<ScreenTableRow>>,
|
||||
#[serde(default)]
|
||||
categories: Option<Vec<ScreenCategory>>,
|
||||
#[serde(default)]
|
||||
output_path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
struct ScreenCategory {
|
||||
category_code: String,
|
||||
category_label: String,
|
||||
item_count: u64,
|
||||
total_heat: u64,
|
||||
avg_heat: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
struct ScreenTableRow {
|
||||
rank: u64,
|
||||
title: String,
|
||||
url: String,
|
||||
category_code: String,
|
||||
category_label: String,
|
||||
heat_text: String,
|
||||
heat_value: u64,
|
||||
reply_count: u64,
|
||||
upvote_count: u64,
|
||||
favorite_count: u64,
|
||||
heart_count: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ScreenPayload {
|
||||
snapshot_id: String,
|
||||
generated_at_ms: u64,
|
||||
categories: Vec<ScreenCategory>,
|
||||
table: Vec<ScreenTableRow>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for ScreenHtmlExportTool {
|
||||
fn name(&self) -> &str {
|
||||
SCREEN_HTML_EXPORT_TOOL_NAME
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
"Render a local Zhihu hotlist ECharts dashboard HTML for leadership demos and new-tab presentation."
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"snapshot_id": { "type": "string" },
|
||||
"generated_at_ms": { "type": "integer" },
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {}
|
||||
}
|
||||
},
|
||||
"table": {
|
||||
"type": "array",
|
||||
"items": { "type": "object" }
|
||||
},
|
||||
"categories": {
|
||||
"type": "array",
|
||||
"items": { "type": "object" }
|
||||
},
|
||||
"output_path": { "type": "string" }
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
|
||||
let parsed = match serde_json::from_value::<ScreenHtmlExportArgs>(args) {
|
||||
Ok(value) => value,
|
||||
Err(err) => return Ok(failed_tool_result(format!("invalid tool arguments: {err}"))),
|
||||
};
|
||||
|
||||
let table = match parsed.table {
|
||||
Some(table) if !table.is_empty() => table,
|
||||
Some(_) => return Ok(failed_tool_result("table must not be empty".to_string())),
|
||||
None => match parsed.rows {
|
||||
Some(rows) => build_table_from_rows(&rows)?,
|
||||
None => {
|
||||
return Ok(failed_tool_result(
|
||||
"rows or table is required for screen_html_export".to_string(),
|
||||
))
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if table.is_empty() {
|
||||
return Ok(failed_tool_result("table must not be empty".to_string()));
|
||||
}
|
||||
|
||||
let categories = parsed
|
||||
.categories
|
||||
.filter(|items| !items.is_empty())
|
||||
.unwrap_or_else(|| derive_categories(&table));
|
||||
let payload = ScreenPayload {
|
||||
snapshot_id: parsed
|
||||
.snapshot_id
|
||||
.map(|value| value.trim().to_string())
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or_else(default_snapshot_id),
|
||||
generated_at_ms: parsed.generated_at_ms.unwrap_or_else(now_ms),
|
||||
categories,
|
||||
table,
|
||||
};
|
||||
|
||||
let rendered = render_template(&payload)?;
|
||||
let output_path = parsed
|
||||
.output_path
|
||||
.as_deref()
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| default_output_path(&self.workspace_root));
|
||||
write_output_html(&output_path, &rendered)?;
|
||||
|
||||
let presentation_url = file_url_for_path(&output_path);
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: json!({
|
||||
"title": DEFAULT_SCREEN_TITLE,
|
||||
"output_path": output_path,
|
||||
"renderer": SCREEN_HTML_EXPORT_TOOL_NAME,
|
||||
"row_count": payload.table.len(),
|
||||
"snapshot_id": payload.snapshot_id,
|
||||
"presentation": {
|
||||
"mode": "new_tab",
|
||||
"title": DEFAULT_SCREEN_TITLE,
|
||||
"url": presentation_url,
|
||||
"open_in_new_tab": true
|
||||
}
|
||||
})
|
||||
.to_string(),
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn failed_tool_result(error: String) -> ToolResult {
|
||||
ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_table_from_rows(rows: &[Vec<Value>]) -> anyhow::Result<Vec<ScreenTableRow>> {
|
||||
if rows.is_empty() {
|
||||
return Err(anyhow::anyhow!("rows must not be empty"));
|
||||
}
|
||||
|
||||
rows.iter()
|
||||
.enumerate()
|
||||
.map(|(index, row)| {
|
||||
if row.len() != 3 {
|
||||
return Err(anyhow::anyhow!(
|
||||
"each row must contain exactly 3 values: rank, title, heat"
|
||||
));
|
||||
}
|
||||
|
||||
let rank = value_to_rank(&row[0]).unwrap_or((index + 1) as u64);
|
||||
let title = value_to_string(&row[1]);
|
||||
if title.trim().is_empty() {
|
||||
return Err(anyhow::anyhow!("title must not be empty"));
|
||||
}
|
||||
let heat_text = value_to_string(&row[2]);
|
||||
let heat_value = parse_heat_value(&heat_text);
|
||||
let (category_code, category_label) = classify_title(&title);
|
||||
|
||||
Ok(ScreenTableRow {
|
||||
rank,
|
||||
title,
|
||||
url: format!("https://www.zhihu.com/question/hotlist-{rank}"),
|
||||
category_code: category_code.to_string(),
|
||||
category_label: category_label.to_string(),
|
||||
heat_text,
|
||||
heat_value,
|
||||
reply_count: 0,
|
||||
upvote_count: 0,
|
||||
favorite_count: 0,
|
||||
heart_count: 0,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn derive_categories(table: &[ScreenTableRow]) -> Vec<ScreenCategory> {
|
||||
let mut grouped: BTreeMap<(String, String), (u64, u64)> = BTreeMap::new();
|
||||
for row in table {
|
||||
let key = (row.category_code.clone(), row.category_label.clone());
|
||||
let entry = grouped.entry(key).or_insert((0, 0));
|
||||
entry.0 += 1;
|
||||
entry.1 += row.heat_value;
|
||||
}
|
||||
|
||||
grouped
|
||||
.into_iter()
|
||||
.map(|((category_code, category_label), (item_count, total_heat))| ScreenCategory {
|
||||
category_code,
|
||||
category_label,
|
||||
item_count,
|
||||
total_heat,
|
||||
avg_heat: if item_count == 0 {
|
||||
0
|
||||
} else {
|
||||
total_heat / item_count
|
||||
},
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn classify_title(title: &str) -> (&'static str, &'static str) {
|
||||
let normalized = title.to_ascii_lowercase();
|
||||
if contains_any(&normalized, &["ai", "芯片", "科技", "算法", "机器人", "无人机"]) {
|
||||
return ("technology", "科技");
|
||||
}
|
||||
if contains_any(&normalized, &["电影", "综艺", "明星", "周杰伦", "短剧", "娱乐"]) {
|
||||
return ("entertainment", "娱乐");
|
||||
}
|
||||
if contains_any(&normalized, &["足球", "比赛", "联赛", "国足", "体育", "冠军"]) {
|
||||
return ("sports", "体育");
|
||||
}
|
||||
if contains_any(&normalized, &["航母", "作战", "军", "军事", "演训"]) {
|
||||
return ("military", "军事");
|
||||
}
|
||||
if contains_any(&normalized, &["出口", "经济", "市场", "财经", "消费", "股"]) {
|
||||
return ("finance", "财经");
|
||||
}
|
||||
("society", "社会")
|
||||
}
|
||||
|
||||
fn contains_any(haystack: &str, needles: &[&str]) -> bool {
|
||||
needles.iter().any(|needle| haystack.contains(needle))
|
||||
}
|
||||
|
||||
fn parse_heat_value(heat_text: &str) -> u64 {
|
||||
let compact = heat_text.trim().replace(',', "");
|
||||
if compact.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let number_part = compact
|
||||
.chars()
|
||||
.filter(|ch| ch.is_ascii_digit() || *ch == '.')
|
||||
.collect::<String>();
|
||||
let base = number_part.parse::<f64>().unwrap_or(0.0);
|
||||
|
||||
let multiplier = if compact.contains('亿') {
|
||||
100_000_000.0
|
||||
} else if compact.contains('万') {
|
||||
10_000.0
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
(base * multiplier).round() as u64
|
||||
}
|
||||
|
||||
fn value_to_string(value: &Value) -> String {
|
||||
match value {
|
||||
Value::String(text) => text.clone(),
|
||||
Value::Number(number) => number.to_string(),
|
||||
Value::Bool(flag) => flag.to_string(),
|
||||
Value::Null => String::new(),
|
||||
other => other.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_rank(value: &Value) -> Option<u64> {
|
||||
match value {
|
||||
Value::Number(number) => number.as_u64(),
|
||||
Value::String(text) => text.trim().parse::<u64>().ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn render_template(payload: &ScreenPayload) -> anyhow::Result<String> {
|
||||
let payload_json = serde_json::to_string_pretty(payload)?;
|
||||
let payload_start = TEMPLATE
|
||||
.find(PAYLOAD_START_MARKER)
|
||||
.ok_or_else(|| anyhow::anyhow!("default payload start marker missing"))?;
|
||||
let payload_end = TEMPLATE
|
||||
.find(PAYLOAD_END_MARKER)
|
||||
.ok_or_else(|| anyhow::anyhow!("default payload end marker missing"))?;
|
||||
let replacement = format!(
|
||||
"{PAYLOAD_START_MARKER}{}\n",
|
||||
indent_block(&payload_json, " ")
|
||||
);
|
||||
|
||||
Ok(format!(
|
||||
"{}{}{}",
|
||||
&TEMPLATE[..payload_start],
|
||||
replacement,
|
||||
&TEMPLATE[payload_end..],
|
||||
))
|
||||
}
|
||||
|
||||
fn indent_block(value: &str, indent: &str) -> String {
|
||||
value
|
||||
.lines()
|
||||
.map(|line| format!("{indent}{line}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
fn write_output_html(path: &Path, rendered: &str) -> anyhow::Result<()> {
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
fs::write(path, rendered)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn default_output_path(workspace_root: &Path) -> PathBuf {
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|value| value.as_nanos())
|
||||
.unwrap_or_default();
|
||||
workspace_root
|
||||
.join("out")
|
||||
.join(format!("zhihu-hotlist-screen-{nanos}.html"))
|
||||
}
|
||||
|
||||
fn default_snapshot_id() -> String {
|
||||
format!("zhihu-hotlist-screen-{}", now_ms())
|
||||
}
|
||||
|
||||
fn now_ms() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|value| value.as_millis() as u64)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn file_url_for_path(path: &Path) -> String {
|
||||
Url::from_file_path(path)
|
||||
.map(|url| url.to_string())
|
||||
.unwrap_or_else(|_| format!("file://{}", path.display()))
|
||||
}
|
||||
346
src/compat/workflow_executor.rs
Normal file
346
src/compat/workflow_executor.rs
Normal file
@@ -0,0 +1,346 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::path::Path;
|
||||
|
||||
use regex::Regex;
|
||||
use serde_json::{json, Value};
|
||||
use zeroclaw::tools::Tool;
|
||||
|
||||
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
|
||||
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
|
||||
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
|
||||
|
||||
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
|
||||
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
|
||||
const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"];
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum WorkflowRoute {
|
||||
ZhihuHotlistExportXlsx,
|
||||
ZhihuHotlistScreen,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct HotlistItem {
|
||||
rank: u64,
|
||||
title: String,
|
||||
heat: String,
|
||||
}
|
||||
|
||||
pub fn detect_route(
|
||||
instruction: &str,
|
||||
page_url: Option<&str>,
|
||||
page_title: Option<&str>,
|
||||
) -> Option<WorkflowRoute> {
|
||||
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let normalized = instruction.to_ascii_lowercase();
|
||||
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
|
||||
return Some(WorkflowRoute::ZhihuHotlistScreen);
|
||||
}
|
||||
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
|
||||
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
|
||||
let normalized = summary.to_ascii_lowercase();
|
||||
if normalized.contains(".xlsx") || normalized.contains(".html") {
|
||||
return false;
|
||||
}
|
||||
|
||||
let looks_like_denial = summary.contains("拒绝") ||
|
||||
normalized.contains("denied") ||
|
||||
normalized.contains("failed") ||
|
||||
summary.contains("失败") ||
|
||||
summary.contains("无法");
|
||||
|
||||
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
|
||||
}
|
||||
|
||||
pub fn execute_route<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
workspace_root: &Path,
|
||||
instruction: &str,
|
||||
route: WorkflowRoute,
|
||||
) -> Result<String, PipeError> {
|
||||
let top_n = extract_top_n(instruction);
|
||||
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
|
||||
if items.is_empty() {
|
||||
return Err(PipeError::Protocol(
|
||||
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
match route {
|
||||
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
|
||||
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_hotlist_items<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
top_n: usize,
|
||||
) -> Result<Vec<HotlistItem>, PipeError> {
|
||||
navigate_hotlist_with_retry(transport, browser_tool)?;
|
||||
|
||||
for selector in HOTLIST_ROOT_SELECTORS {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: format!("getText {selector}"),
|
||||
})?;
|
||||
let response = browser_tool.invoke(
|
||||
Action::GetText,
|
||||
json!({ "selector": selector }),
|
||||
ZHIHU_DOMAIN,
|
||||
)?;
|
||||
if !response.success {
|
||||
continue;
|
||||
}
|
||||
let text = response.data["text"].as_str().unwrap_or_default();
|
||||
let items = parse_hotlist_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return Ok(items);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn navigate_hotlist_with_retry<T: Transport + 'static>(
|
||||
transport: &T,
|
||||
browser_tool: &BrowserPipeTool<T>,
|
||||
) -> Result<(), PipeError> {
|
||||
let mut last_error = None;
|
||||
for _ in 0..2 {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: format!("navigate {ZHIHU_HOT_URL}"),
|
||||
})?;
|
||||
match browser_tool.invoke(
|
||||
Action::Navigate,
|
||||
json!({ "url": ZHIHU_HOT_URL }),
|
||||
ZHIHU_DOMAIN,
|
||||
) {
|
||||
Ok(response) if response.success => return Ok(()),
|
||||
Ok(response) => {
|
||||
last_error = Some(PipeError::Protocol(format!(
|
||||
"navigate failed: {}",
|
||||
response.data
|
||||
)));
|
||||
}
|
||||
Err(err) => last_error = Some(err),
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| {
|
||||
PipeError::Protocol("navigate failed without detailed error".to_string())
|
||||
}))
|
||||
}
|
||||
|
||||
fn export_xlsx<T: Transport>(
|
||||
transport: &T,
|
||||
workspace_root: &Path,
|
||||
items: &[HotlistItem],
|
||||
) -> Result<String, PipeError> {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: "call openxml_office".to_string(),
|
||||
})?;
|
||||
let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf());
|
||||
let rows = items
|
||||
.iter()
|
||||
.map(|item| json!([item.rank, item.title, item.heat]))
|
||||
.collect::<Vec<_>>();
|
||||
let runtime = tokio::runtime::Runtime::new()
|
||||
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
||||
let result = runtime
|
||||
.block_on(tool.execute(json!({
|
||||
"sheet_name": "知乎热榜",
|
||||
"columns": ["rank", "title", "heat"],
|
||||
"rows": rows,
|
||||
})))
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
if !result.success {
|
||||
return Err(PipeError::Protocol(
|
||||
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
|
||||
));
|
||||
}
|
||||
|
||||
let payload: Value = serde_json::from_str(&result.output)
|
||||
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
|
||||
let output_path = payload["output_path"]
|
||||
.as_str()
|
||||
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
|
||||
Ok(format!("已导出知乎热榜 Excel {output_path}"))
|
||||
}
|
||||
|
||||
fn export_screen<T: Transport>(
|
||||
transport: &T,
|
||||
workspace_root: &Path,
|
||||
items: &[HotlistItem],
|
||||
) -> Result<String, PipeError> {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: "call screen_html_export".to_string(),
|
||||
})?;
|
||||
let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf());
|
||||
let rows = items
|
||||
.iter()
|
||||
.map(|item| json!([item.rank, item.title, item.heat]))
|
||||
.collect::<Vec<_>>();
|
||||
let runtime = tokio::runtime::Runtime::new()
|
||||
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
||||
let result = runtime
|
||||
.block_on(tool.execute(json!({ "rows": rows })))
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
if !result.success {
|
||||
return Err(PipeError::Protocol(
|
||||
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
|
||||
));
|
||||
}
|
||||
|
||||
let payload: Value = serde_json::from_str(&result.output)
|
||||
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
|
||||
let output_path = payload["output_path"]
|
||||
.as_str()
|
||||
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
|
||||
Ok(format!("已生成知乎热榜大屏 {output_path}"))
|
||||
}
|
||||
|
||||
fn parse_hotlist_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let mut items = parse_single_line_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return items;
|
||||
}
|
||||
|
||||
let lines = normalize_lines(text);
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
let mut idx = 0usize;
|
||||
|
||||
while idx < lines.len() && items.len() < top_n {
|
||||
let Some(rank) = parse_rank(&lines[idx]) else {
|
||||
idx += 1;
|
||||
continue;
|
||||
};
|
||||
if !seen_ranks.insert(rank) {
|
||||
idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut title = None;
|
||||
let mut heat = None;
|
||||
for candidate in lines.iter().skip(idx + 1).take(6) {
|
||||
if parse_rank(candidate).is_some() {
|
||||
break;
|
||||
}
|
||||
if heat.is_none() && looks_like_heat(candidate) {
|
||||
heat = Some(normalize_heat(candidate));
|
||||
continue;
|
||||
}
|
||||
if title.is_none() && !is_noise_line(candidate) {
|
||||
title = Some(candidate.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(title), Some(heat)) = (title, heat) {
|
||||
items.push(HotlistItem { rank, title, heat });
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
items.sort_by_key(|item| item.rank);
|
||||
items.truncate(top_n);
|
||||
items
|
||||
}
|
||||
|
||||
fn parse_single_line_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let re = Regex::new(
|
||||
r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$",
|
||||
)
|
||||
.expect("valid hotlist single-line regex");
|
||||
let mut items = Vec::new();
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
|
||||
for capture in re.captures_iter(text) {
|
||||
let rank = capture
|
||||
.get(1)
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.unwrap_or_default();
|
||||
if rank == 0 || !seen_ranks.insert(rank) {
|
||||
continue;
|
||||
}
|
||||
let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
if title.is_empty() || heat.is_empty() {
|
||||
continue;
|
||||
}
|
||||
items.push(HotlistItem {
|
||||
rank,
|
||||
title: title.to_string(),
|
||||
heat: normalize_heat(heat),
|
||||
});
|
||||
if items.len() >= top_n {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn normalize_lines(text: &str) -> Vec<String> {
|
||||
text.lines()
|
||||
.map(str::trim)
|
||||
.filter(|line| !line.is_empty())
|
||||
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_rank(line: &str) -> Option<u64> {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if trimmed.chars().all(|ch| ch.is_ascii_digit()) {
|
||||
return trimmed.parse::<u64>().ok().filter(|value| *value > 0);
|
||||
}
|
||||
|
||||
let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex");
|
||||
rank_re
|
||||
.captures(trimmed)
|
||||
.and_then(|capture| capture.get(1))
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.filter(|value| *value > 0)
|
||||
}
|
||||
|
||||
fn looks_like_heat(line: &str) -> bool {
|
||||
let compact = line.replace(' ', "");
|
||||
let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex");
|
||||
heat_re.is_match(compact.as_str())
|
||||
}
|
||||
|
||||
fn normalize_heat(line: &str) -> String {
|
||||
line.replace(' ', "")
|
||||
.trim_end_matches("热度")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn is_noise_line(line: &str) -> bool {
|
||||
matches!(
|
||||
line,
|
||||
"知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容"
|
||||
)
|
||||
}
|
||||
|
||||
fn extract_top_n(instruction: &str) -> usize {
|
||||
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
|
||||
re.captures(&instruction.to_ascii_lowercase())
|
||||
.and_then(|capture| capture.get(1))
|
||||
.and_then(|value| value.as_str().parse::<usize>().ok())
|
||||
.filter(|value| *value > 0)
|
||||
.unwrap_or(10)
|
||||
}
|
||||
Reference in New Issue
Block a user