wip: checkpoint 2026-03-29 runtime work

This commit is contained in:
zyl
2026-03-29 22:44:30 +08:00
parent 7d9036b2d4
commit e294fbb9b1
30 changed files with 6759 additions and 161 deletions

View File

@@ -1,29 +1,70 @@
use async_trait::async_trait;
use reqwest::Url;
use serde_json::{json, Map, Value};
use zeroclaw::tools::{Tool, ToolResult};
use crate::pipe::{Action, BrowserPipeTool, Transport};
use crate::pipe::{Action, BrowserPipeTool, ExecutionSurfaceMetadata, Transport};
pub const BROWSER_ACTION_TOOL_NAME: &str = "browser_action";
pub const SUPERRPA_BROWSER_TOOL_NAME: &str = "superrpa_browser";
const BROWSER_ACTION_TOOL_DESCRIPTION: &str =
"Execute browser actions in SuperRPA through the existing sgClaw pipe protocol.";
const SUPERRPA_BROWSER_TOOL_DESCRIPTION: &str =
"Use SuperRPA's dedicated privileged browser interface for page navigation, DOM reading, clicking, and typing inside the protected browser host.";
const MAX_DATA_STRING_CHARS: usize = 2048;
const MAX_AOM_STRING_CHARS: usize = 128;
const MAX_DATA_ARRAY_ITEMS: usize = 12;
const MAX_DATA_OBJECT_FIELDS: usize = 24;
const MAX_DATA_RECURSION_DEPTH: usize = 4;
pub struct ZeroClawBrowserTool<T: Transport> {
browser_tool: BrowserPipeTool<T>,
tool_name: &'static str,
description: &'static str,
}
impl<T: Transport> ZeroClawBrowserTool<T> {
pub fn new(browser_tool: BrowserPipeTool<T>) -> Self {
Self { browser_tool }
Self::named(
browser_tool,
BROWSER_ACTION_TOOL_NAME,
BROWSER_ACTION_TOOL_DESCRIPTION,
)
}
pub fn new_superrpa(browser_tool: BrowserPipeTool<T>) -> Self {
Self::named(
browser_tool,
SUPERRPA_BROWSER_TOOL_NAME,
SUPERRPA_BROWSER_TOOL_DESCRIPTION,
)
}
fn named(
browser_tool: BrowserPipeTool<T>,
tool_name: &'static str,
description: &'static str,
) -> Self {
Self {
browser_tool,
tool_name,
description,
}
}
pub fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.browser_tool.surface_metadata()
}
}
#[async_trait]
impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
fn name(&self) -> &str {
BROWSER_ACTION_TOOL_NAME
self.tool_name
}
fn description(&self) -> &str {
"Execute browser actions in SuperRPA through the existing sgClaw pipe protocol."
self.description
}
fn parameters_schema(&self) -> Value {
@@ -72,8 +113,9 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
let output = serde_json::to_string(&json!({
"seq": result.seq,
"success": result.success,
"data": result.data,
"aom_snapshot": result.aom_snapshot,
"data": compact_json_value(&result.data, 0),
"aom_snapshot": compact_aom_snapshot(&result.aom_snapshot),
"aom_snapshot_count": result.aom_snapshot.len(),
"timing": result.timing
}))?;
@@ -103,9 +145,10 @@ fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, Bro
};
let action_name = take_required_string(&mut args, "action")?;
let expected_domain = take_required_string(&mut args, "expected_domain")?;
let raw_expected_domain = take_required_string(&mut args, "expected_domain")?;
let action = parse_action(&action_name)?;
validate_action_params(&action_name, &args)?;
let expected_domain = normalize_expected_domain(&action, &raw_expected_domain, &args)?;
Ok(BrowserActionRequest {
action,
@@ -178,6 +221,59 @@ fn require_non_empty_string(
}
}
fn normalize_expected_domain(
action: &Action,
raw_expected_domain: &str,
args: &Map<String, Value>,
) -> Result<String, BrowserActionAdapterError> {
if matches!(action, Action::Navigate) {
if let Some(url) = args.get("url").and_then(Value::as_str) {
if let Some(host) = host_from_url(url) {
return Ok(host);
}
}
}
normalize_domain_like(raw_expected_domain).ok_or_else(|| {
BrowserActionAdapterError::InvalidArguments(format!(
"expected_domain must resolve to a hostname, got {raw_expected_domain:?}"
))
})
}
fn host_from_url(raw: &str) -> Option<String> {
Url::parse(raw)
.ok()?
.host_str()
.map(|host| host.to_ascii_lowercase())
}
fn normalize_domain_like(raw: &str) -> Option<String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
if let Some(host) = host_from_url(trimmed) {
return Some(host);
}
let without_scheme = trimmed
.trim_start_matches("https://")
.trim_start_matches("http://");
let host = without_scheme
.split(['/', '?', '#'])
.next()
.unwrap_or_default()
.split(':')
.next()
.unwrap_or_default()
.trim()
.to_ascii_lowercase();
(!host.is_empty()).then_some(host)
}
fn format_browser_action_error(data: &Value) -> String {
if let Some(error) = data.get("error") {
if let Some(message) = error.get("message").and_then(Value::as_str) {
@@ -193,6 +289,111 @@ fn format_browser_action_error(data: &Value) -> String {
format!("browser action failed: {data}")
}
fn compact_json_value(value: &Value, depth: usize) -> Value {
compact_json_value_with_string_limit(value, depth, MAX_DATA_STRING_CHARS)
}
fn compact_aom_snapshot(snapshot: &[Value]) -> Value {
Value::Array(
snapshot
.iter()
.take(MAX_DATA_ARRAY_ITEMS)
.map(|item| compact_aom_value(item, 0))
.collect(),
)
}
fn compact_aom_value(value: &Value, depth: usize) -> Value {
if depth >= MAX_DATA_RECURSION_DEPTH {
return Value::String("[truncated nested value]".to_string());
}
match value {
Value::Object(map) => {
let mut compacted = Map::new();
for (key, item) in map.iter().take(MAX_DATA_OBJECT_FIELDS) {
if matches!(key.as_str(), "text" | "value" | "html") {
let summary = item
.as_str()
.map(|text| format!("[{} chars omitted]", text.chars().count()))
.unwrap_or_else(|| "[omitted]".to_string());
compacted.insert(key.clone(), Value::String(summary));
continue;
}
compacted.insert(key.clone(), compact_aom_value(item, depth + 1));
}
Value::Object(compacted)
}
Value::Array(items) => Value::Array(
items
.iter()
.take(MAX_DATA_ARRAY_ITEMS)
.map(|item| compact_aom_value(item, depth + 1))
.collect(),
),
_ => compact_json_value_with_string_limit(value, depth, MAX_AOM_STRING_CHARS),
}
}
fn compact_json_value_with_string_limit(
value: &Value,
depth: usize,
max_string_chars: usize,
) -> Value {
if depth >= MAX_DATA_RECURSION_DEPTH {
return Value::String("[truncated nested value]".to_string());
}
match value {
Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
Value::String(text) => Value::String(truncate_string(text, max_string_chars)),
Value::Array(items) => {
let mut compacted: Vec<Value> = items
.iter()
.take(MAX_DATA_ARRAY_ITEMS)
.map(|item| compact_json_value_with_string_limit(item, depth + 1, max_string_chars))
.collect();
if items.len() > MAX_DATA_ARRAY_ITEMS {
compacted.push(Value::String(format!(
"[{} more items omitted]",
items.len() - MAX_DATA_ARRAY_ITEMS
)));
}
Value::Array(compacted)
}
Value::Object(map) => {
let mut compacted = Map::new();
for (key, item) in map.iter().take(MAX_DATA_OBJECT_FIELDS) {
compacted.insert(
key.clone(),
compact_json_value_with_string_limit(item, depth + 1, max_string_chars),
);
}
if map.len() > MAX_DATA_OBJECT_FIELDS {
compacted.insert(
"_truncated_fields".to_string(),
Value::String(format!(
"{} additional fields omitted",
map.len() - MAX_DATA_OBJECT_FIELDS
)),
);
}
Value::Object(compacted)
}
}
}
fn truncate_string(text: &str, max_chars: usize) -> String {
let total_chars = text.chars().count();
if total_chars <= max_chars {
return text.to_string();
}
let prefix: String = text.chars().take(max_chars).collect();
format!("{prefix}...[truncated {} chars]", total_chars - max_chars)
}
#[derive(Debug, thiserror::Error)]
enum BrowserActionAdapterError {
#[error("unsupported action: {0}")]

View File

@@ -18,7 +18,15 @@ pub fn log_entry_for_turn_event(event: &TurnEvent) -> Option<AgentMessage> {
}
fn format_tool_call(name: &str, args: &Value) -> String {
if name != "browser_action" {
if name == "read_skill" {
let skill_name = args
.get("name")
.and_then(Value::as_str)
.unwrap_or("<missing-skill>");
return format!("read_skill {skill_name}");
}
if !is_browser_tool_call(name) {
return format!("call {name}");
}
@@ -54,10 +62,14 @@ fn format_tool_call(name: &str, args: &Value) -> String {
.unwrap_or("<missing-selector>");
format!("getText {selector}")
}
other => format!("browser_action {other}"),
other => format!("{name} {other}"),
}
}
fn is_browser_tool_call(name: &str) -> bool {
name == "browser_action" || name == "superrpa_browser"
}
fn is_tool_error(output: &str) -> bool {
output.starts_with("Error:")
}

View File

@@ -3,4 +3,8 @@ pub mod config_adapter;
pub mod cron_adapter;
pub mod event_bridge;
pub mod memory_adapter;
pub mod openxml_office_tool;
pub mod orchestration;
pub mod runtime;
pub mod screen_html_export_tool;
pub mod workflow_executor;

View File

@@ -0,0 +1,392 @@
use async_trait::async_trait;
use serde::Deserialize;
use serde_json::{json, Value};
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
use zeroclaw::tools::{Tool, ToolResult};
const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office";
const DEFAULT_SHEET_NAME: &str = "知乎热榜";
const MAX_COLUMNS: [&str; 3] = ["rank", "title", "heat"];
pub struct OpenXmlOfficeTool {
workspace_root: PathBuf,
}
impl OpenXmlOfficeTool {
pub fn new(workspace_root: PathBuf) -> Self {
Self { workspace_root }
}
}
#[derive(Debug, Deserialize)]
struct OpenXmlOfficeArgs {
sheet_name: String,
columns: Vec<String>,
rows: Vec<Vec<Value>>,
#[serde(default)]
output_path: Option<String>,
}
#[async_trait]
impl Tool for OpenXmlOfficeTool {
fn name(&self) -> &str {
OPENXML_OFFICE_TOOL_NAME
}
fn description(&self) -> &str {
"Export structured Zhihu hotlist rows into a local .xlsx file through the OpenXML office pipeline."
}
fn parameters_schema(&self) -> Value {
json!({
"type": "object",
"required": ["sheet_name", "columns", "rows"],
"properties": {
"sheet_name": { "type": "string" },
"columns": {
"type": "array",
"items": { "type": "string" }
},
"rows": {
"type": "array",
"items": {
"type": "array",
"items": {}
}
},
"output_path": { "type": "string" }
}
})
}
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
let parsed = match serde_json::from_value::<OpenXmlOfficeArgs>(args) {
Ok(value) => value,
Err(err) => return Ok(failed_tool_result(format!("invalid tool arguments: {err}"))),
};
if parsed.sheet_name.trim() != DEFAULT_SHEET_NAME {
return Ok(failed_tool_result(format!(
"unsupported sheet_name: expected {DEFAULT_SHEET_NAME}"
)));
}
let expected_columns = MAX_COLUMNS
.iter()
.map(|value| value.to_string())
.collect::<Vec<_>>();
if parsed.columns != expected_columns {
return Ok(failed_tool_result(
"unsupported columns: expected [rank, title, heat]".to_string(),
));
}
if parsed.rows.is_empty() {
return Ok(failed_tool_result("rows must not be empty".to_string()));
}
if parsed.rows.iter().any(|row| row.len() != 3) {
return Ok(failed_tool_result(
"each row must contain exactly 3 values".to_string(),
));
}
let job_root = create_job_root(&self.workspace_root)?;
let template_path = job_root.join("zhihu_hotlist_template.xlsx");
let payload_path = job_root.join("payload.json");
let request_path = job_root.join("request.json");
let output_path = parsed
.output_path
.as_deref()
.map(PathBuf::from)
.unwrap_or_else(|| default_output_path(&self.workspace_root));
write_hotlist_template(&template_path, parsed.rows.len())?;
write_payload_json(&payload_path, &parsed.rows)?;
write_request_json(&request_path, &template_path, &payload_path, &output_path)?;
let rendered = run_openxml_cli(&request_path)?;
let artifact_path = rendered["data"]["artifact"]["path"]
.as_str()
.map(str::to_string)
.unwrap_or_else(|| output_path.to_string_lossy().to_string());
Ok(ToolResult {
success: true,
output: json!({
"sheet_name": DEFAULT_SHEET_NAME,
"output_path": artifact_path,
"row_count": parsed.rows.len(),
"renderer": OPENXML_OFFICE_TOOL_NAME
})
.to_string(),
error: None,
})
}
}
fn failed_tool_result(error: String) -> ToolResult {
ToolResult {
success: false,
output: String::new(),
error: Some(error),
}
}
fn create_job_root(workspace_root: &Path) -> anyhow::Result<PathBuf> {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)?
.as_nanos();
let path = workspace_root.join(".sgclaw-openxml").join(format!("{nanos}"));
fs::create_dir_all(&path)?;
Ok(path)
}
fn default_output_path(workspace_root: &Path) -> PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_nanos())
.unwrap_or_default();
workspace_root
.join("out")
.join(format!("zhihu-hotlist-{nanos}.xlsx"))
}
fn write_payload_json(path: &Path, rows: &[Vec<Value>]) -> anyhow::Result<()> {
let mut variables = BTreeMap::new();
for (idx, row) in rows.iter().enumerate() {
let row_index = idx + 1;
variables.insert(format!("RANK_{row_index}"), value_to_string(&row[0]));
variables.insert(format!("TITLE_{row_index}"), value_to_string(&row[1]));
variables.insert(format!("HEAT_{row_index}"), value_to_string(&row[2]));
}
let payload = json!({
"variables": variables,
"tables": {},
"images": {}
});
fs::write(path, serde_json::to_vec_pretty(&payload)?)?;
Ok(())
}
fn write_request_json(
path: &Path,
template_path: &Path,
payload_path: &Path,
output_path: &Path,
) -> anyhow::Result<()> {
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent)?;
}
let request = json!({
"api_version": "2026-03-26",
"job": "zhihu_hotlist_export",
"template": {
"kind": "xlsx",
"path": template_path
},
"output": {
"path": output_path
},
"data": {
"json_path": payload_path
},
"options": {
"strict": true,
"allow_unresolved": false,
"dry_run": false
}
});
fs::write(path, serde_json::to_vec_pretty(&request)?)?;
Ok(())
}
fn run_openxml_cli(request_path: &Path) -> anyhow::Result<Value> {
let manifest_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.map(|path| path.join("openxml_cli").join("Cargo.toml"))
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli manifest path"))?;
let binary_path = manifest_path
.parent()
.map(|path| path.join("target").join("debug").join("openxml-cli"))
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli binary path"))?;
let output = if binary_path.exists() {
Command::new(&binary_path)
.args([
"template",
"render",
"--request",
request_path.to_string_lossy().as_ref(),
"--json",
])
.output()?
} else {
Command::new("cargo")
.args([
"run",
"--quiet",
"--manifest-path",
manifest_path.to_string_lossy().as_ref(),
"--",
"template",
"render",
"--request",
request_path.to_string_lossy().as_ref(),
"--json",
])
.output()?
};
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
return Err(anyhow::anyhow!(if stderr.is_empty() {
"openxml_cli render failed".to_string()
} else {
stderr
}));
}
let stdout = String::from_utf8(output.stdout)?;
Ok(serde_json::from_str(&stdout)?)
}
fn value_to_string(value: &Value) -> String {
match value {
Value::String(text) => text.clone(),
Value::Number(number) => number.to_string(),
Value::Bool(flag) => flag.to_string(),
Value::Null => String::new(),
other => other.to_string(),
}
}
fn write_hotlist_template(path: &Path, row_count: usize) -> anyhow::Result<()> {
let build_root = path
.parent()
.ok_or_else(|| anyhow::anyhow!("template path has no parent"))?
.join("template-build");
fs::create_dir_all(build_root.join("_rels"))?;
fs::create_dir_all(build_root.join("docProps"))?;
fs::create_dir_all(build_root.join("xl/_rels"))?;
fs::create_dir_all(build_root.join("xl/worksheets"))?;
fs::write(build_root.join("[Content_Types].xml"), content_types_xml())?;
fs::write(build_root.join("_rels/.rels"), root_rels_xml())?;
fs::write(build_root.join("docProps/app.xml"), app_xml())?;
fs::write(build_root.join("docProps/core.xml"), core_xml())?;
fs::write(build_root.join("xl/workbook.xml"), workbook_xml())?;
fs::write(
build_root.join("xl/_rels/workbook.xml.rels"),
workbook_rels_xml(),
)?;
fs::write(
build_root.join("xl/worksheets/sheet1.xml"),
worksheet_xml(row_count),
)?;
if path.exists() {
fs::remove_file(path)?;
}
let zip = Command::new("zip")
.current_dir(&build_root)
.args(["-q", "-r", path.to_string_lossy().as_ref(), "."])
.output()?;
if !zip.status.success() {
let stderr = String::from_utf8_lossy(&zip.stderr);
return Err(anyhow::anyhow!(format!(
"failed to create xlsx template: {}",
stderr.trim()
)));
}
let _ = fs::remove_dir_all(&build_root);
Ok(())
}
fn worksheet_xml(row_count: usize) -> String {
let mut rows = Vec::new();
rows.push(
"<row r=\"1\"><c r=\"A1\" t=\"inlineStr\"><is><t>rank</t></is></c><c r=\"B1\" t=\"inlineStr\"><is><t>title</t></is></c><c r=\"C1\" t=\"inlineStr\"><is><t>heat</t></is></c></row>"
.to_string(),
);
for idx in 1..=row_count {
let excel_row = idx + 1;
rows.push(format!(
"<row r=\"{excel_row}\"><c r=\"A{excel_row}\" t=\"inlineStr\"><is><t>{{{{RANK_{idx}}}}}</t></is></c><c r=\"B{excel_row}\" t=\"inlineStr\"><is><t>{{{{TITLE_{idx}}}}}</t></is></c><c r=\"C{excel_row}\" t=\"inlineStr\"><is><t>{{{{HEAT_{idx}}}}}</t></is></c></row>"
));
}
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\
<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">\
<sheetData>{}</sheetData>\
</worksheet>",
rows.join("")
)
}
fn content_types_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
<Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
</Types>"#
}
fn root_rels_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
</Relationships>"#
}
fn app_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<Application>sgClaw</Application>
</Properties>"#
}
fn core_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dc:title>Zhihu Hotlist Export</dc:title>
</cp:coreProperties>"#
}
fn workbook_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheets>
<sheet name="知乎热榜" sheetId="1" r:id="rId1"/>
</sheets>
</workbook>"#
}
fn workbook_rels_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
</Relationships>"#
}

View File

@@ -0,0 +1,67 @@
use std::path::Path;
use crate::compat::runtime::CompatTaskContext;
use crate::config::SgClawSettings;
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
pub fn should_use_primary_orchestration(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> bool {
let normalized = instruction.to_ascii_lowercase();
let needs_export = normalized.contains("excel")
|| normalized.contains("xlsx")
|| instruction.contains("导出")
|| instruction.contains("大屏")
|| instruction.contains("新标签页")
|| normalized.contains("dashboard");
crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) && needs_export
}
pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
transport: &T,
browser_tool: BrowserPipeTool<T>,
instruction: &str,
task_context: &CompatTaskContext,
workspace_root: &Path,
settings: &SgClawSettings,
) -> Result<String, PipeError> {
let route = crate::compat::workflow_executor::detect_route(
instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
);
let primary_result = crate::compat::runtime::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
instruction,
task_context,
workspace_root,
settings,
);
match (route, primary_result) {
(Some(route), Ok(summary))
if crate::compat::workflow_executor::should_fallback_after_summary(&summary, &route) =>
{
crate::compat::workflow_executor::execute_route(
transport,
&browser_tool,
workspace_root,
instruction,
route,
)
}
(_, Ok(summary)) => Ok(summary),
(Some(route), Err(_)) => crate::compat::workflow_executor::execute_route(
transport,
&browser_tool,
workspace_root,
instruction,
route,
),
(None, Err(err)) => Err(err),
}
}

View File

@@ -0,0 +1,382 @@
use async_trait::async_trait;
use reqwest::Url;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use zeroclaw::tools::{Tool, ToolResult};
const SCREEN_HTML_EXPORT_TOOL_NAME: &str = "screen_html_export";
const DEFAULT_SCREEN_TITLE: &str = "知乎热榜主题分类分析大屏";
const TEMPLATE: &str = include_str!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/../skill_lib/skills/zhihu-hotlist-screen/assets/zhihu-hotlist-echarts.html"
));
const PAYLOAD_START_MARKER: &str = " const defaultPayload = ";
const PAYLOAD_END_MARKER: &str = "\n\n const themeMeta = {";
pub struct ScreenHtmlExportTool {
workspace_root: PathBuf,
}
impl ScreenHtmlExportTool {
pub fn new(workspace_root: PathBuf) -> Self {
Self { workspace_root }
}
}
#[derive(Debug, Deserialize)]
struct ScreenHtmlExportArgs {
#[serde(default)]
snapshot_id: Option<String>,
#[serde(default)]
generated_at_ms: Option<u64>,
#[serde(default)]
rows: Option<Vec<Vec<Value>>>,
#[serde(default)]
table: Option<Vec<ScreenTableRow>>,
#[serde(default)]
categories: Option<Vec<ScreenCategory>>,
#[serde(default)]
output_path: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
struct ScreenCategory {
category_code: String,
category_label: String,
item_count: u64,
total_heat: u64,
avg_heat: u64,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
struct ScreenTableRow {
rank: u64,
title: String,
url: String,
category_code: String,
category_label: String,
heat_text: String,
heat_value: u64,
reply_count: u64,
upvote_count: u64,
favorite_count: u64,
heart_count: u64,
}
#[derive(Debug, Serialize)]
struct ScreenPayload {
snapshot_id: String,
generated_at_ms: u64,
categories: Vec<ScreenCategory>,
table: Vec<ScreenTableRow>,
}
#[async_trait]
impl Tool for ScreenHtmlExportTool {
fn name(&self) -> &str {
SCREEN_HTML_EXPORT_TOOL_NAME
}
fn description(&self) -> &str {
"Render a local Zhihu hotlist ECharts dashboard HTML for leadership demos and new-tab presentation."
}
fn parameters_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"snapshot_id": { "type": "string" },
"generated_at_ms": { "type": "integer" },
"rows": {
"type": "array",
"items": {
"type": "array",
"items": {}
}
},
"table": {
"type": "array",
"items": { "type": "object" }
},
"categories": {
"type": "array",
"items": { "type": "object" }
},
"output_path": { "type": "string" }
}
})
}
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
let parsed = match serde_json::from_value::<ScreenHtmlExportArgs>(args) {
Ok(value) => value,
Err(err) => return Ok(failed_tool_result(format!("invalid tool arguments: {err}"))),
};
let table = match parsed.table {
Some(table) if !table.is_empty() => table,
Some(_) => return Ok(failed_tool_result("table must not be empty".to_string())),
None => match parsed.rows {
Some(rows) => build_table_from_rows(&rows)?,
None => {
return Ok(failed_tool_result(
"rows or table is required for screen_html_export".to_string(),
))
}
},
};
if table.is_empty() {
return Ok(failed_tool_result("table must not be empty".to_string()));
}
let categories = parsed
.categories
.filter(|items| !items.is_empty())
.unwrap_or_else(|| derive_categories(&table));
let payload = ScreenPayload {
snapshot_id: parsed
.snapshot_id
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
.unwrap_or_else(default_snapshot_id),
generated_at_ms: parsed.generated_at_ms.unwrap_or_else(now_ms),
categories,
table,
};
let rendered = render_template(&payload)?;
let output_path = parsed
.output_path
.as_deref()
.map(PathBuf::from)
.unwrap_or_else(|| default_output_path(&self.workspace_root));
write_output_html(&output_path, &rendered)?;
let presentation_url = file_url_for_path(&output_path);
Ok(ToolResult {
success: true,
output: json!({
"title": DEFAULT_SCREEN_TITLE,
"output_path": output_path,
"renderer": SCREEN_HTML_EXPORT_TOOL_NAME,
"row_count": payload.table.len(),
"snapshot_id": payload.snapshot_id,
"presentation": {
"mode": "new_tab",
"title": DEFAULT_SCREEN_TITLE,
"url": presentation_url,
"open_in_new_tab": true
}
})
.to_string(),
error: None,
})
}
}
fn failed_tool_result(error: String) -> ToolResult {
ToolResult {
success: false,
output: String::new(),
error: Some(error),
}
}
fn build_table_from_rows(rows: &[Vec<Value>]) -> anyhow::Result<Vec<ScreenTableRow>> {
if rows.is_empty() {
return Err(anyhow::anyhow!("rows must not be empty"));
}
rows.iter()
.enumerate()
.map(|(index, row)| {
if row.len() != 3 {
return Err(anyhow::anyhow!(
"each row must contain exactly 3 values: rank, title, heat"
));
}
let rank = value_to_rank(&row[0]).unwrap_or((index + 1) as u64);
let title = value_to_string(&row[1]);
if title.trim().is_empty() {
return Err(anyhow::anyhow!("title must not be empty"));
}
let heat_text = value_to_string(&row[2]);
let heat_value = parse_heat_value(&heat_text);
let (category_code, category_label) = classify_title(&title);
Ok(ScreenTableRow {
rank,
title,
url: format!("https://www.zhihu.com/question/hotlist-{rank}"),
category_code: category_code.to_string(),
category_label: category_label.to_string(),
heat_text,
heat_value,
reply_count: 0,
upvote_count: 0,
favorite_count: 0,
heart_count: 0,
})
})
.collect()
}
fn derive_categories(table: &[ScreenTableRow]) -> Vec<ScreenCategory> {
let mut grouped: BTreeMap<(String, String), (u64, u64)> = BTreeMap::new();
for row in table {
let key = (row.category_code.clone(), row.category_label.clone());
let entry = grouped.entry(key).or_insert((0, 0));
entry.0 += 1;
entry.1 += row.heat_value;
}
grouped
.into_iter()
.map(|((category_code, category_label), (item_count, total_heat))| ScreenCategory {
category_code,
category_label,
item_count,
total_heat,
avg_heat: if item_count == 0 {
0
} else {
total_heat / item_count
},
})
.collect()
}
fn classify_title(title: &str) -> (&'static str, &'static str) {
let normalized = title.to_ascii_lowercase();
if contains_any(&normalized, &["ai", "芯片", "科技", "算法", "机器人", "无人机"]) {
return ("technology", "科技");
}
if contains_any(&normalized, &["电影", "综艺", "明星", "周杰伦", "短剧", "娱乐"]) {
return ("entertainment", "娱乐");
}
if contains_any(&normalized, &["足球", "比赛", "联赛", "国足", "体育", "冠军"]) {
return ("sports", "体育");
}
if contains_any(&normalized, &["航母", "作战", "", "军事", "演训"]) {
return ("military", "军事");
}
if contains_any(&normalized, &["出口", "经济", "市场", "财经", "消费", ""]) {
return ("finance", "财经");
}
("society", "社会")
}
fn contains_any(haystack: &str, needles: &[&str]) -> bool {
needles.iter().any(|needle| haystack.contains(needle))
}
fn parse_heat_value(heat_text: &str) -> u64 {
let compact = heat_text.trim().replace(',', "");
if compact.is_empty() {
return 0;
}
let number_part = compact
.chars()
.filter(|ch| ch.is_ascii_digit() || *ch == '.')
.collect::<String>();
let base = number_part.parse::<f64>().unwrap_or(0.0);
let multiplier = if compact.contains('亿') {
100_000_000.0
} else if compact.contains('万') {
10_000.0
} else {
1.0
};
(base * multiplier).round() as u64
}
fn value_to_string(value: &Value) -> String {
match value {
Value::String(text) => text.clone(),
Value::Number(number) => number.to_string(),
Value::Bool(flag) => flag.to_string(),
Value::Null => String::new(),
other => other.to_string(),
}
}
fn value_to_rank(value: &Value) -> Option<u64> {
match value {
Value::Number(number) => number.as_u64(),
Value::String(text) => text.trim().parse::<u64>().ok(),
_ => None,
}
}
fn render_template(payload: &ScreenPayload) -> anyhow::Result<String> {
let payload_json = serde_json::to_string_pretty(payload)?;
let payload_start = TEMPLATE
.find(PAYLOAD_START_MARKER)
.ok_or_else(|| anyhow::anyhow!("default payload start marker missing"))?;
let payload_end = TEMPLATE
.find(PAYLOAD_END_MARKER)
.ok_or_else(|| anyhow::anyhow!("default payload end marker missing"))?;
let replacement = format!(
"{PAYLOAD_START_MARKER}{}\n",
indent_block(&payload_json, " ")
);
Ok(format!(
"{}{}{}",
&TEMPLATE[..payload_start],
replacement,
&TEMPLATE[payload_end..],
))
}
fn indent_block(value: &str, indent: &str) -> String {
value
.lines()
.map(|line| format!("{indent}{line}"))
.collect::<Vec<_>>()
.join("\n")
}
fn write_output_html(path: &Path, rendered: &str) -> anyhow::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
fs::write(path, rendered)?;
Ok(())
}
fn default_output_path(workspace_root: &Path) -> PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_nanos())
.unwrap_or_default();
workspace_root
.join("out")
.join(format!("zhihu-hotlist-screen-{nanos}.html"))
}
fn default_snapshot_id() -> String {
format!("zhihu-hotlist-screen-{}", now_ms())
}
fn now_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_millis() as u64)
.unwrap_or_default()
}
fn file_url_for_path(path: &Path) -> String {
Url::from_file_path(path)
.map(|url| url.to_string())
.unwrap_or_else(|_| format!("file://{}", path.display()))
}

View File

@@ -0,0 +1,346 @@
use std::collections::BTreeSet;
use std::path::Path;
use regex::Regex;
use serde_json::{json, Value};
use zeroclaw::tools::Tool;
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"];
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkflowRoute {
ZhihuHotlistExportXlsx,
ZhihuHotlistScreen,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct HotlistItem {
rank: u64,
title: String,
heat: String,
}
pub fn detect_route(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> Option<WorkflowRoute> {
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
return None;
}
let normalized = instruction.to_ascii_lowercase();
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
return Some(WorkflowRoute::ZhihuHotlistScreen);
}
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
}
None
}
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
let normalized = summary.to_ascii_lowercase();
if normalized.contains(".xlsx") || normalized.contains(".html") {
return false;
}
let looks_like_denial = summary.contains("拒绝") ||
normalized.contains("denied") ||
normalized.contains("failed") ||
summary.contains("失败") ||
summary.contains("无法");
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
}
pub fn execute_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
workspace_root: &Path,
instruction: &str,
route: WorkflowRoute,
) -> Result<String, PipeError> {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
}
}
fn collect_hotlist_items<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
top_n: usize,
) -> Result<Vec<HotlistItem>, PipeError> {
navigate_hotlist_with_retry(transport, browser_tool)?;
for selector in HOTLIST_ROOT_SELECTORS {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("getText {selector}"),
})?;
let response = browser_tool.invoke(
Action::GetText,
json!({ "selector": selector }),
ZHIHU_DOMAIN,
)?;
if !response.success {
continue;
}
let text = response.data["text"].as_str().unwrap_or_default();
let items = parse_hotlist_items(text, top_n);
if !items.is_empty() {
return Ok(items);
}
}
Ok(Vec::new())
}
fn navigate_hotlist_with_retry<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
) -> Result<(), PipeError> {
let mut last_error = None;
for _ in 0..2 {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {ZHIHU_HOT_URL}"),
})?;
match browser_tool.invoke(
Action::Navigate,
json!({ "url": ZHIHU_HOT_URL }),
ZHIHU_DOMAIN,
) {
Ok(response) if response.success => return Ok(()),
Ok(response) => {
last_error = Some(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)));
}
Err(err) => last_error = Some(err),
}
}
Err(last_error.unwrap_or_else(|| {
PipeError::Protocol("navigate failed without detailed error".to_string())
}))
}
fn export_xlsx<T: Transport>(
transport: &T,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call openxml_office".to_string(),
})?;
let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf());
let rows = items
.iter()
.map(|item| json!([item.rank, item.title, item.heat]))
.collect::<Vec<_>>();
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
let result = runtime
.block_on(tool.execute(json!({
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": rows,
})))
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
Ok(format!("已导出知乎热榜 Excel {output_path}"))
}
fn export_screen<T: Transport>(
transport: &T,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call screen_html_export".to_string(),
})?;
let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf());
let rows = items
.iter()
.map(|item| json!([item.rank, item.title, item.heat]))
.collect::<Vec<_>>();
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
let result = runtime
.block_on(tool.execute(json!({ "rows": rows })))
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
Ok(format!("已生成知乎热榜大屏 {output_path}"))
}
fn parse_hotlist_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
let mut items = parse_single_line_items(text, top_n);
if !items.is_empty() {
return items;
}
let lines = normalize_lines(text);
let mut seen_ranks = BTreeSet::new();
let mut idx = 0usize;
while idx < lines.len() && items.len() < top_n {
let Some(rank) = parse_rank(&lines[idx]) else {
idx += 1;
continue;
};
if !seen_ranks.insert(rank) {
idx += 1;
continue;
}
let mut title = None;
let mut heat = None;
for candidate in lines.iter().skip(idx + 1).take(6) {
if parse_rank(candidate).is_some() {
break;
}
if heat.is_none() && looks_like_heat(candidate) {
heat = Some(normalize_heat(candidate));
continue;
}
if title.is_none() && !is_noise_line(candidate) {
title = Some(candidate.clone());
}
}
if let (Some(title), Some(heat)) = (title, heat) {
items.push(HotlistItem { rank, title, heat });
}
idx += 1;
}
items.sort_by_key(|item| item.rank);
items.truncate(top_n);
items
}
fn parse_single_line_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
let re = Regex::new(
r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$",
)
.expect("valid hotlist single-line regex");
let mut items = Vec::new();
let mut seen_ranks = BTreeSet::new();
for capture in re.captures_iter(text) {
let rank = capture
.get(1)
.and_then(|value| value.as_str().parse::<u64>().ok())
.unwrap_or_default();
if rank == 0 || !seen_ranks.insert(rank) {
continue;
}
let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or("");
let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or("");
if title.is_empty() || heat.is_empty() {
continue;
}
items.push(HotlistItem {
rank,
title: title.to_string(),
heat: normalize_heat(heat),
});
if items.len() >= top_n {
break;
}
}
items
}
fn normalize_lines(text: &str) -> Vec<String> {
text.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
.collect()
}
fn parse_rank(line: &str) -> Option<u64> {
let trimmed = line.trim();
if trimmed.is_empty() {
return None;
}
if trimmed.chars().all(|ch| ch.is_ascii_digit()) {
return trimmed.parse::<u64>().ok().filter(|value| *value > 0);
}
let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex");
rank_re
.captures(trimmed)
.and_then(|capture| capture.get(1))
.and_then(|value| value.as_str().parse::<u64>().ok())
.filter(|value| *value > 0)
}
fn looks_like_heat(line: &str) -> bool {
let compact = line.replace(' ', "");
let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex");
heat_re.is_match(compact.as_str())
}
fn normalize_heat(line: &str) -> String {
line.replace(' ', "")
.trim_end_matches("热度")
.to_string()
}
fn is_noise_line(line: &str) -> bool {
matches!(
line,
"知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容"
)
}
fn extract_top_n(instruction: &str) -> usize {
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
re.captures(&instruction.to_ascii_lowercase())
.and_then(|capture| capture.get(1))
.and_then(|value| value.as_str().parse::<usize>().ok())
.filter(|value| *value > 0)
.unwrap_or(10)
}