wip: checkpoint 2026-03-29 runtime work

This commit is contained in:
zyl
2026-03-29 22:44:30 +08:00
parent 7d9036b2d4
commit e294fbb9b1
30 changed files with 6759 additions and 161 deletions

View File

@@ -4,10 +4,11 @@ pub mod runtime;
use std::ffi::OsString;
use std::path::PathBuf;
use crate::compat::config_adapter::resolve_skills_dir_from_sgclaw_settings;
use crate::compat::runtime::CompatTaskContext;
use crate::config::DeepSeekSettings;
use crate::config::SgClawSettings;
use crate::pipe::{
AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
AgentMessage, BrowserMessage, BrowserPipeTool, PipeError, Transport,
};
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -58,12 +59,12 @@ impl AgentRuntimeContext {
Ok(Self::new(config_path, workspace_root))
}
fn load_deepseek_settings(&self) -> Result<Option<DeepSeekSettings>, PipeError> {
DeepSeekSettings::load(self.config_path.as_deref())
fn load_sgclaw_settings(&self) -> Result<Option<SgClawSettings>, PipeError> {
SgClawSettings::load(self.config_path.as_deref())
.map_err(|err| PipeError::Protocol(err.to_string()))
}
fn deepseek_source_label(&self) -> String {
fn settings_source_label(&self) -> String {
match &self.config_path {
Some(path) if path.exists() => path.display().to_string(),
_ => "environment".to_string(),
@@ -88,39 +89,9 @@ fn send_mode_log<T: Transport>(transport: &T, mode: &str) -> Result<(), PipeErro
})
}
fn explicit_non_task_response(history: &[ConversationMessage], instruction: &str) -> Option<String> {
if !history.is_empty() {
return None;
}
let trimmed = instruction.trim();
if trimmed.is_empty() {
return Some("sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。".to_string());
}
const TASK_HINTS: &[&str] = &[
"打开", "搜索", "点击", "输入", "导航", "跳转", "访问", "提取", "获取", "网页", "页面",
"标签页", "百度", "知乎", "google", "open", "search", "click", "type", "navigate",
];
if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) {
return None;
}
const CHITCHAT_INPUTS: &[&str] = &[
"hi", "hello", "hey", "你好", "您好", "", "在吗", "你是谁", "介绍一下你自己",
];
if CHITCHAT_INPUTS
.iter()
.any(|candidate| trimmed.eq_ignore_ascii_case(candidate) || trimmed == *candidate)
{
return Some("sgClaw 现在是浏览器任务入口,不做通用闲聊。请直接说你想在网页上执行什么操作,例如“打开百度搜索天气”。".to_string());
}
if trimmed.chars().count() <= 8 {
return Some("sgClaw 现在只处理浏览器任务。请直接描述网页操作目标,例如“打开知乎搜索天气”或“提取当前页面标题”。".to_string());
}
None
fn missing_llm_configuration_summary() -> String {
"未配置大语言模型。请先在 sgclaw_config.json 或环境变量中配置 apiKey、baseUrl 与 model。"
.to_string()
}
fn execute_plan<T: Transport>(
@@ -187,10 +158,11 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
page_url,
page_title,
} => {
if let Some(summary) = explicit_non_task_response(&messages, &instruction) {
let instruction = instruction.trim().to_string();
if instruction.is_empty() {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary,
summary: "请输入任务内容。".to_string(),
});
}
@@ -210,19 +182,64 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
),
});
}
let completion = match context.load_deepseek_settings() {
let completion = match context.load_sgclaw_settings() {
Ok(Some(settings)) => {
let resolved_skills_dir =
resolve_skills_dir_from_sgclaw_settings(&context.workspace_root, &settings);
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"DeepSeek config loaded from {} model={} base_url={}",
context.deepseek_source_label(),
settings.model,
settings.base_url
context.settings_source_label(),
settings.provider_model,
settings.provider_base_url
),
});
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"skills dir resolved to {}",
resolved_skills_dir.display()
),
});
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"runtime profile={:?} skills_prompt_mode={:?}",
settings.runtime_profile,
settings.skills_prompt_mode
),
});
if crate::compat::orchestration::should_use_primary_orchestration(
&instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
) {
let _ = send_mode_log(transport, "zeroclaw_process_message_primary");
match crate::compat::orchestration::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => {
return transport.send(&AgentMessage::TaskComplete {
success: true,
summary,
})
}
Err(err) => {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
})
}
}
}
let _ = send_mode_log(transport, "compat_llm_primary");
match crate::compat::runtime::execute_task(
match crate::compat::runtime::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
&instruction,
@@ -240,24 +257,9 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
},
}
}
Ok(None) => match planner::plan_instruction(&instruction) {
Ok(plan) => {
let _ = send_mode_log(transport, "deterministic_planner");
match execute_plan(transport, browser_tool, &plan) {
Ok(summary) => AgentMessage::TaskComplete {
success: true,
summary,
},
Err(err) => AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
},
}
}
Err(err) => AgentMessage::TaskComplete {
success: false,
summary: PipeError::Protocol(err.to_string()).to_string(),
},
Ok(None) => AgentMessage::TaskComplete {
success: false,
summary: missing_llm_configuration_summary(),
},
Err(err) => {
let _ = transport.send(&AgentMessage::LogEntry {

View File

@@ -1,29 +1,70 @@
use async_trait::async_trait;
use reqwest::Url;
use serde_json::{json, Map, Value};
use zeroclaw::tools::{Tool, ToolResult};
use crate::pipe::{Action, BrowserPipeTool, Transport};
use crate::pipe::{Action, BrowserPipeTool, ExecutionSurfaceMetadata, Transport};
pub const BROWSER_ACTION_TOOL_NAME: &str = "browser_action";
pub const SUPERRPA_BROWSER_TOOL_NAME: &str = "superrpa_browser";
const BROWSER_ACTION_TOOL_DESCRIPTION: &str =
"Execute browser actions in SuperRPA through the existing sgClaw pipe protocol.";
const SUPERRPA_BROWSER_TOOL_DESCRIPTION: &str =
"Use SuperRPA's dedicated privileged browser interface for page navigation, DOM reading, clicking, and typing inside the protected browser host.";
const MAX_DATA_STRING_CHARS: usize = 2048;
const MAX_AOM_STRING_CHARS: usize = 128;
const MAX_DATA_ARRAY_ITEMS: usize = 12;
const MAX_DATA_OBJECT_FIELDS: usize = 24;
const MAX_DATA_RECURSION_DEPTH: usize = 4;
pub struct ZeroClawBrowserTool<T: Transport> {
browser_tool: BrowserPipeTool<T>,
tool_name: &'static str,
description: &'static str,
}
impl<T: Transport> ZeroClawBrowserTool<T> {
pub fn new(browser_tool: BrowserPipeTool<T>) -> Self {
Self { browser_tool }
Self::named(
browser_tool,
BROWSER_ACTION_TOOL_NAME,
BROWSER_ACTION_TOOL_DESCRIPTION,
)
}
pub fn new_superrpa(browser_tool: BrowserPipeTool<T>) -> Self {
Self::named(
browser_tool,
SUPERRPA_BROWSER_TOOL_NAME,
SUPERRPA_BROWSER_TOOL_DESCRIPTION,
)
}
fn named(
browser_tool: BrowserPipeTool<T>,
tool_name: &'static str,
description: &'static str,
) -> Self {
Self {
browser_tool,
tool_name,
description,
}
}
pub fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.browser_tool.surface_metadata()
}
}
#[async_trait]
impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
fn name(&self) -> &str {
BROWSER_ACTION_TOOL_NAME
self.tool_name
}
fn description(&self) -> &str {
"Execute browser actions in SuperRPA through the existing sgClaw pipe protocol."
self.description
}
fn parameters_schema(&self) -> Value {
@@ -72,8 +113,9 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
let output = serde_json::to_string(&json!({
"seq": result.seq,
"success": result.success,
"data": result.data,
"aom_snapshot": result.aom_snapshot,
"data": compact_json_value(&result.data, 0),
"aom_snapshot": compact_aom_snapshot(&result.aom_snapshot),
"aom_snapshot_count": result.aom_snapshot.len(),
"timing": result.timing
}))?;
@@ -103,9 +145,10 @@ fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, Bro
};
let action_name = take_required_string(&mut args, "action")?;
let expected_domain = take_required_string(&mut args, "expected_domain")?;
let raw_expected_domain = take_required_string(&mut args, "expected_domain")?;
let action = parse_action(&action_name)?;
validate_action_params(&action_name, &args)?;
let expected_domain = normalize_expected_domain(&action, &raw_expected_domain, &args)?;
Ok(BrowserActionRequest {
action,
@@ -178,6 +221,59 @@ fn require_non_empty_string(
}
}
fn normalize_expected_domain(
action: &Action,
raw_expected_domain: &str,
args: &Map<String, Value>,
) -> Result<String, BrowserActionAdapterError> {
if matches!(action, Action::Navigate) {
if let Some(url) = args.get("url").and_then(Value::as_str) {
if let Some(host) = host_from_url(url) {
return Ok(host);
}
}
}
normalize_domain_like(raw_expected_domain).ok_or_else(|| {
BrowserActionAdapterError::InvalidArguments(format!(
"expected_domain must resolve to a hostname, got {raw_expected_domain:?}"
))
})
}
fn host_from_url(raw: &str) -> Option<String> {
Url::parse(raw)
.ok()?
.host_str()
.map(|host| host.to_ascii_lowercase())
}
fn normalize_domain_like(raw: &str) -> Option<String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
if let Some(host) = host_from_url(trimmed) {
return Some(host);
}
let without_scheme = trimmed
.trim_start_matches("https://")
.trim_start_matches("http://");
let host = without_scheme
.split(['/', '?', '#'])
.next()
.unwrap_or_default()
.split(':')
.next()
.unwrap_or_default()
.trim()
.to_ascii_lowercase();
(!host.is_empty()).then_some(host)
}
fn format_browser_action_error(data: &Value) -> String {
if let Some(error) = data.get("error") {
if let Some(message) = error.get("message").and_then(Value::as_str) {
@@ -193,6 +289,111 @@ fn format_browser_action_error(data: &Value) -> String {
format!("browser action failed: {data}")
}
fn compact_json_value(value: &Value, depth: usize) -> Value {
compact_json_value_with_string_limit(value, depth, MAX_DATA_STRING_CHARS)
}
fn compact_aom_snapshot(snapshot: &[Value]) -> Value {
Value::Array(
snapshot
.iter()
.take(MAX_DATA_ARRAY_ITEMS)
.map(|item| compact_aom_value(item, 0))
.collect(),
)
}
fn compact_aom_value(value: &Value, depth: usize) -> Value {
if depth >= MAX_DATA_RECURSION_DEPTH {
return Value::String("[truncated nested value]".to_string());
}
match value {
Value::Object(map) => {
let mut compacted = Map::new();
for (key, item) in map.iter().take(MAX_DATA_OBJECT_FIELDS) {
if matches!(key.as_str(), "text" | "value" | "html") {
let summary = item
.as_str()
.map(|text| format!("[{} chars omitted]", text.chars().count()))
.unwrap_or_else(|| "[omitted]".to_string());
compacted.insert(key.clone(), Value::String(summary));
continue;
}
compacted.insert(key.clone(), compact_aom_value(item, depth + 1));
}
Value::Object(compacted)
}
Value::Array(items) => Value::Array(
items
.iter()
.take(MAX_DATA_ARRAY_ITEMS)
.map(|item| compact_aom_value(item, depth + 1))
.collect(),
),
_ => compact_json_value_with_string_limit(value, depth, MAX_AOM_STRING_CHARS),
}
}
fn compact_json_value_with_string_limit(
value: &Value,
depth: usize,
max_string_chars: usize,
) -> Value {
if depth >= MAX_DATA_RECURSION_DEPTH {
return Value::String("[truncated nested value]".to_string());
}
match value {
Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
Value::String(text) => Value::String(truncate_string(text, max_string_chars)),
Value::Array(items) => {
let mut compacted: Vec<Value> = items
.iter()
.take(MAX_DATA_ARRAY_ITEMS)
.map(|item| compact_json_value_with_string_limit(item, depth + 1, max_string_chars))
.collect();
if items.len() > MAX_DATA_ARRAY_ITEMS {
compacted.push(Value::String(format!(
"[{} more items omitted]",
items.len() - MAX_DATA_ARRAY_ITEMS
)));
}
Value::Array(compacted)
}
Value::Object(map) => {
let mut compacted = Map::new();
for (key, item) in map.iter().take(MAX_DATA_OBJECT_FIELDS) {
compacted.insert(
key.clone(),
compact_json_value_with_string_limit(item, depth + 1, max_string_chars),
);
}
if map.len() > MAX_DATA_OBJECT_FIELDS {
compacted.insert(
"_truncated_fields".to_string(),
Value::String(format!(
"{} additional fields omitted",
map.len() - MAX_DATA_OBJECT_FIELDS
)),
);
}
Value::Object(compacted)
}
}
}
fn truncate_string(text: &str, max_chars: usize) -> String {
let total_chars = text.chars().count();
if total_chars <= max_chars {
return text.to_string();
}
let prefix: String = text.chars().take(max_chars).collect();
format!("{prefix}...[truncated {} chars]", total_chars - max_chars)
}
#[derive(Debug, thiserror::Error)]
enum BrowserActionAdapterError {
#[error("unsupported action: {0}")]

View File

@@ -18,7 +18,15 @@ pub fn log_entry_for_turn_event(event: &TurnEvent) -> Option<AgentMessage> {
}
fn format_tool_call(name: &str, args: &Value) -> String {
if name != "browser_action" {
if name == "read_skill" {
let skill_name = args
.get("name")
.and_then(Value::as_str)
.unwrap_or("<missing-skill>");
return format!("read_skill {skill_name}");
}
if !is_browser_tool_call(name) {
return format!("call {name}");
}
@@ -54,10 +62,14 @@ fn format_tool_call(name: &str, args: &Value) -> String {
.unwrap_or("<missing-selector>");
format!("getText {selector}")
}
other => format!("browser_action {other}"),
other => format!("{name} {other}"),
}
}
fn is_browser_tool_call(name: &str) -> bool {
name == "browser_action" || name == "superrpa_browser"
}
fn is_tool_error(output: &str) -> bool {
output.starts_with("Error:")
}

View File

@@ -3,4 +3,8 @@ pub mod config_adapter;
pub mod cron_adapter;
pub mod event_bridge;
pub mod memory_adapter;
pub mod openxml_office_tool;
pub mod orchestration;
pub mod runtime;
pub mod screen_html_export_tool;
pub mod workflow_executor;

View File

@@ -0,0 +1,392 @@
use async_trait::async_trait;
use serde::Deserialize;
use serde_json::{json, Value};
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
use zeroclaw::tools::{Tool, ToolResult};
const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office";
const DEFAULT_SHEET_NAME: &str = "知乎热榜";
const MAX_COLUMNS: [&str; 3] = ["rank", "title", "heat"];
pub struct OpenXmlOfficeTool {
workspace_root: PathBuf,
}
impl OpenXmlOfficeTool {
pub fn new(workspace_root: PathBuf) -> Self {
Self { workspace_root }
}
}
#[derive(Debug, Deserialize)]
struct OpenXmlOfficeArgs {
sheet_name: String,
columns: Vec<String>,
rows: Vec<Vec<Value>>,
#[serde(default)]
output_path: Option<String>,
}
#[async_trait]
impl Tool for OpenXmlOfficeTool {
fn name(&self) -> &str {
OPENXML_OFFICE_TOOL_NAME
}
fn description(&self) -> &str {
"Export structured Zhihu hotlist rows into a local .xlsx file through the OpenXML office pipeline."
}
fn parameters_schema(&self) -> Value {
json!({
"type": "object",
"required": ["sheet_name", "columns", "rows"],
"properties": {
"sheet_name": { "type": "string" },
"columns": {
"type": "array",
"items": { "type": "string" }
},
"rows": {
"type": "array",
"items": {
"type": "array",
"items": {}
}
},
"output_path": { "type": "string" }
}
})
}
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
let parsed = match serde_json::from_value::<OpenXmlOfficeArgs>(args) {
Ok(value) => value,
Err(err) => return Ok(failed_tool_result(format!("invalid tool arguments: {err}"))),
};
if parsed.sheet_name.trim() != DEFAULT_SHEET_NAME {
return Ok(failed_tool_result(format!(
"unsupported sheet_name: expected {DEFAULT_SHEET_NAME}"
)));
}
let expected_columns = MAX_COLUMNS
.iter()
.map(|value| value.to_string())
.collect::<Vec<_>>();
if parsed.columns != expected_columns {
return Ok(failed_tool_result(
"unsupported columns: expected [rank, title, heat]".to_string(),
));
}
if parsed.rows.is_empty() {
return Ok(failed_tool_result("rows must not be empty".to_string()));
}
if parsed.rows.iter().any(|row| row.len() != 3) {
return Ok(failed_tool_result(
"each row must contain exactly 3 values".to_string(),
));
}
let job_root = create_job_root(&self.workspace_root)?;
let template_path = job_root.join("zhihu_hotlist_template.xlsx");
let payload_path = job_root.join("payload.json");
let request_path = job_root.join("request.json");
let output_path = parsed
.output_path
.as_deref()
.map(PathBuf::from)
.unwrap_or_else(|| default_output_path(&self.workspace_root));
write_hotlist_template(&template_path, parsed.rows.len())?;
write_payload_json(&payload_path, &parsed.rows)?;
write_request_json(&request_path, &template_path, &payload_path, &output_path)?;
let rendered = run_openxml_cli(&request_path)?;
let artifact_path = rendered["data"]["artifact"]["path"]
.as_str()
.map(str::to_string)
.unwrap_or_else(|| output_path.to_string_lossy().to_string());
Ok(ToolResult {
success: true,
output: json!({
"sheet_name": DEFAULT_SHEET_NAME,
"output_path": artifact_path,
"row_count": parsed.rows.len(),
"renderer": OPENXML_OFFICE_TOOL_NAME
})
.to_string(),
error: None,
})
}
}
fn failed_tool_result(error: String) -> ToolResult {
ToolResult {
success: false,
output: String::new(),
error: Some(error),
}
}
fn create_job_root(workspace_root: &Path) -> anyhow::Result<PathBuf> {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)?
.as_nanos();
let path = workspace_root.join(".sgclaw-openxml").join(format!("{nanos}"));
fs::create_dir_all(&path)?;
Ok(path)
}
fn default_output_path(workspace_root: &Path) -> PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_nanos())
.unwrap_or_default();
workspace_root
.join("out")
.join(format!("zhihu-hotlist-{nanos}.xlsx"))
}
fn write_payload_json(path: &Path, rows: &[Vec<Value>]) -> anyhow::Result<()> {
let mut variables = BTreeMap::new();
for (idx, row) in rows.iter().enumerate() {
let row_index = idx + 1;
variables.insert(format!("RANK_{row_index}"), value_to_string(&row[0]));
variables.insert(format!("TITLE_{row_index}"), value_to_string(&row[1]));
variables.insert(format!("HEAT_{row_index}"), value_to_string(&row[2]));
}
let payload = json!({
"variables": variables,
"tables": {},
"images": {}
});
fs::write(path, serde_json::to_vec_pretty(&payload)?)?;
Ok(())
}
fn write_request_json(
path: &Path,
template_path: &Path,
payload_path: &Path,
output_path: &Path,
) -> anyhow::Result<()> {
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent)?;
}
let request = json!({
"api_version": "2026-03-26",
"job": "zhihu_hotlist_export",
"template": {
"kind": "xlsx",
"path": template_path
},
"output": {
"path": output_path
},
"data": {
"json_path": payload_path
},
"options": {
"strict": true,
"allow_unresolved": false,
"dry_run": false
}
});
fs::write(path, serde_json::to_vec_pretty(&request)?)?;
Ok(())
}
fn run_openxml_cli(request_path: &Path) -> anyhow::Result<Value> {
let manifest_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.map(|path| path.join("openxml_cli").join("Cargo.toml"))
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli manifest path"))?;
let binary_path = manifest_path
.parent()
.map(|path| path.join("target").join("debug").join("openxml-cli"))
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli binary path"))?;
let output = if binary_path.exists() {
Command::new(&binary_path)
.args([
"template",
"render",
"--request",
request_path.to_string_lossy().as_ref(),
"--json",
])
.output()?
} else {
Command::new("cargo")
.args([
"run",
"--quiet",
"--manifest-path",
manifest_path.to_string_lossy().as_ref(),
"--",
"template",
"render",
"--request",
request_path.to_string_lossy().as_ref(),
"--json",
])
.output()?
};
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
return Err(anyhow::anyhow!(if stderr.is_empty() {
"openxml_cli render failed".to_string()
} else {
stderr
}));
}
let stdout = String::from_utf8(output.stdout)?;
Ok(serde_json::from_str(&stdout)?)
}
fn value_to_string(value: &Value) -> String {
match value {
Value::String(text) => text.clone(),
Value::Number(number) => number.to_string(),
Value::Bool(flag) => flag.to_string(),
Value::Null => String::new(),
other => other.to_string(),
}
}
fn write_hotlist_template(path: &Path, row_count: usize) -> anyhow::Result<()> {
let build_root = path
.parent()
.ok_or_else(|| anyhow::anyhow!("template path has no parent"))?
.join("template-build");
fs::create_dir_all(build_root.join("_rels"))?;
fs::create_dir_all(build_root.join("docProps"))?;
fs::create_dir_all(build_root.join("xl/_rels"))?;
fs::create_dir_all(build_root.join("xl/worksheets"))?;
fs::write(build_root.join("[Content_Types].xml"), content_types_xml())?;
fs::write(build_root.join("_rels/.rels"), root_rels_xml())?;
fs::write(build_root.join("docProps/app.xml"), app_xml())?;
fs::write(build_root.join("docProps/core.xml"), core_xml())?;
fs::write(build_root.join("xl/workbook.xml"), workbook_xml())?;
fs::write(
build_root.join("xl/_rels/workbook.xml.rels"),
workbook_rels_xml(),
)?;
fs::write(
build_root.join("xl/worksheets/sheet1.xml"),
worksheet_xml(row_count),
)?;
if path.exists() {
fs::remove_file(path)?;
}
let zip = Command::new("zip")
.current_dir(&build_root)
.args(["-q", "-r", path.to_string_lossy().as_ref(), "."])
.output()?;
if !zip.status.success() {
let stderr = String::from_utf8_lossy(&zip.stderr);
return Err(anyhow::anyhow!(format!(
"failed to create xlsx template: {}",
stderr.trim()
)));
}
let _ = fs::remove_dir_all(&build_root);
Ok(())
}
fn worksheet_xml(row_count: usize) -> String {
let mut rows = Vec::new();
rows.push(
"<row r=\"1\"><c r=\"A1\" t=\"inlineStr\"><is><t>rank</t></is></c><c r=\"B1\" t=\"inlineStr\"><is><t>title</t></is></c><c r=\"C1\" t=\"inlineStr\"><is><t>heat</t></is></c></row>"
.to_string(),
);
for idx in 1..=row_count {
let excel_row = idx + 1;
rows.push(format!(
"<row r=\"{excel_row}\"><c r=\"A{excel_row}\" t=\"inlineStr\"><is><t>{{{{RANK_{idx}}}}}</t></is></c><c r=\"B{excel_row}\" t=\"inlineStr\"><is><t>{{{{TITLE_{idx}}}}}</t></is></c><c r=\"C{excel_row}\" t=\"inlineStr\"><is><t>{{{{HEAT_{idx}}}}}</t></is></c></row>"
));
}
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\
<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">\
<sheetData>{}</sheetData>\
</worksheet>",
rows.join("")
)
}
fn content_types_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
<Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>
<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>
</Types>"#
}
fn root_rels_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>
<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>
</Relationships>"#
}
fn app_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"
xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">
<Application>sgClaw</Application>
</Properties>"#
}
fn core_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dc:title>Zhihu Hotlist Export</dc:title>
</cp:coreProperties>"#
}
fn workbook_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheets>
<sheet name="知乎热榜" sheetId="1" r:id="rId1"/>
</sheets>
</workbook>"#
}
fn workbook_rels_xml() -> &'static str {
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
</Relationships>"#
}

View File

@@ -0,0 +1,67 @@
use std::path::Path;
use crate::compat::runtime::CompatTaskContext;
use crate::config::SgClawSettings;
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
pub fn should_use_primary_orchestration(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> bool {
let normalized = instruction.to_ascii_lowercase();
let needs_export = normalized.contains("excel")
|| normalized.contains("xlsx")
|| instruction.contains("导出")
|| instruction.contains("大屏")
|| instruction.contains("新标签页")
|| normalized.contains("dashboard");
crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) && needs_export
}
pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
transport: &T,
browser_tool: BrowserPipeTool<T>,
instruction: &str,
task_context: &CompatTaskContext,
workspace_root: &Path,
settings: &SgClawSettings,
) -> Result<String, PipeError> {
let route = crate::compat::workflow_executor::detect_route(
instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
);
let primary_result = crate::compat::runtime::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
instruction,
task_context,
workspace_root,
settings,
);
match (route, primary_result) {
(Some(route), Ok(summary))
if crate::compat::workflow_executor::should_fallback_after_summary(&summary, &route) =>
{
crate::compat::workflow_executor::execute_route(
transport,
&browser_tool,
workspace_root,
instruction,
route,
)
}
(_, Ok(summary)) => Ok(summary),
(Some(route), Err(_)) => crate::compat::workflow_executor::execute_route(
transport,
&browser_tool,
workspace_root,
instruction,
route,
),
(None, Err(err)) => Err(err),
}
}

View File

@@ -0,0 +1,382 @@
use async_trait::async_trait;
use reqwest::Url;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use zeroclaw::tools::{Tool, ToolResult};
const SCREEN_HTML_EXPORT_TOOL_NAME: &str = "screen_html_export";
const DEFAULT_SCREEN_TITLE: &str = "知乎热榜主题分类分析大屏";
const TEMPLATE: &str = include_str!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/../skill_lib/skills/zhihu-hotlist-screen/assets/zhihu-hotlist-echarts.html"
));
const PAYLOAD_START_MARKER: &str = " const defaultPayload = ";
const PAYLOAD_END_MARKER: &str = "\n\n const themeMeta = {";
pub struct ScreenHtmlExportTool {
workspace_root: PathBuf,
}
impl ScreenHtmlExportTool {
pub fn new(workspace_root: PathBuf) -> Self {
Self { workspace_root }
}
}
#[derive(Debug, Deserialize)]
struct ScreenHtmlExportArgs {
#[serde(default)]
snapshot_id: Option<String>,
#[serde(default)]
generated_at_ms: Option<u64>,
#[serde(default)]
rows: Option<Vec<Vec<Value>>>,
#[serde(default)]
table: Option<Vec<ScreenTableRow>>,
#[serde(default)]
categories: Option<Vec<ScreenCategory>>,
#[serde(default)]
output_path: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
struct ScreenCategory {
category_code: String,
category_label: String,
item_count: u64,
total_heat: u64,
avg_heat: u64,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
struct ScreenTableRow {
rank: u64,
title: String,
url: String,
category_code: String,
category_label: String,
heat_text: String,
heat_value: u64,
reply_count: u64,
upvote_count: u64,
favorite_count: u64,
heart_count: u64,
}
#[derive(Debug, Serialize)]
struct ScreenPayload {
snapshot_id: String,
generated_at_ms: u64,
categories: Vec<ScreenCategory>,
table: Vec<ScreenTableRow>,
}
#[async_trait]
impl Tool for ScreenHtmlExportTool {
fn name(&self) -> &str {
SCREEN_HTML_EXPORT_TOOL_NAME
}
fn description(&self) -> &str {
"Render a local Zhihu hotlist ECharts dashboard HTML for leadership demos and new-tab presentation."
}
fn parameters_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"snapshot_id": { "type": "string" },
"generated_at_ms": { "type": "integer" },
"rows": {
"type": "array",
"items": {
"type": "array",
"items": {}
}
},
"table": {
"type": "array",
"items": { "type": "object" }
},
"categories": {
"type": "array",
"items": { "type": "object" }
},
"output_path": { "type": "string" }
}
})
}
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
let parsed = match serde_json::from_value::<ScreenHtmlExportArgs>(args) {
Ok(value) => value,
Err(err) => return Ok(failed_tool_result(format!("invalid tool arguments: {err}"))),
};
let table = match parsed.table {
Some(table) if !table.is_empty() => table,
Some(_) => return Ok(failed_tool_result("table must not be empty".to_string())),
None => match parsed.rows {
Some(rows) => build_table_from_rows(&rows)?,
None => {
return Ok(failed_tool_result(
"rows or table is required for screen_html_export".to_string(),
))
}
},
};
if table.is_empty() {
return Ok(failed_tool_result("table must not be empty".to_string()));
}
let categories = parsed
.categories
.filter(|items| !items.is_empty())
.unwrap_or_else(|| derive_categories(&table));
let payload = ScreenPayload {
snapshot_id: parsed
.snapshot_id
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
.unwrap_or_else(default_snapshot_id),
generated_at_ms: parsed.generated_at_ms.unwrap_or_else(now_ms),
categories,
table,
};
let rendered = render_template(&payload)?;
let output_path = parsed
.output_path
.as_deref()
.map(PathBuf::from)
.unwrap_or_else(|| default_output_path(&self.workspace_root));
write_output_html(&output_path, &rendered)?;
let presentation_url = file_url_for_path(&output_path);
Ok(ToolResult {
success: true,
output: json!({
"title": DEFAULT_SCREEN_TITLE,
"output_path": output_path,
"renderer": SCREEN_HTML_EXPORT_TOOL_NAME,
"row_count": payload.table.len(),
"snapshot_id": payload.snapshot_id,
"presentation": {
"mode": "new_tab",
"title": DEFAULT_SCREEN_TITLE,
"url": presentation_url,
"open_in_new_tab": true
}
})
.to_string(),
error: None,
})
}
}
fn failed_tool_result(error: String) -> ToolResult {
ToolResult {
success: false,
output: String::new(),
error: Some(error),
}
}
fn build_table_from_rows(rows: &[Vec<Value>]) -> anyhow::Result<Vec<ScreenTableRow>> {
if rows.is_empty() {
return Err(anyhow::anyhow!("rows must not be empty"));
}
rows.iter()
.enumerate()
.map(|(index, row)| {
if row.len() != 3 {
return Err(anyhow::anyhow!(
"each row must contain exactly 3 values: rank, title, heat"
));
}
let rank = value_to_rank(&row[0]).unwrap_or((index + 1) as u64);
let title = value_to_string(&row[1]);
if title.trim().is_empty() {
return Err(anyhow::anyhow!("title must not be empty"));
}
let heat_text = value_to_string(&row[2]);
let heat_value = parse_heat_value(&heat_text);
let (category_code, category_label) = classify_title(&title);
Ok(ScreenTableRow {
rank,
title,
url: format!("https://www.zhihu.com/question/hotlist-{rank}"),
category_code: category_code.to_string(),
category_label: category_label.to_string(),
heat_text,
heat_value,
reply_count: 0,
upvote_count: 0,
favorite_count: 0,
heart_count: 0,
})
})
.collect()
}
fn derive_categories(table: &[ScreenTableRow]) -> Vec<ScreenCategory> {
let mut grouped: BTreeMap<(String, String), (u64, u64)> = BTreeMap::new();
for row in table {
let key = (row.category_code.clone(), row.category_label.clone());
let entry = grouped.entry(key).or_insert((0, 0));
entry.0 += 1;
entry.1 += row.heat_value;
}
grouped
.into_iter()
.map(|((category_code, category_label), (item_count, total_heat))| ScreenCategory {
category_code,
category_label,
item_count,
total_heat,
avg_heat: if item_count == 0 {
0
} else {
total_heat / item_count
},
})
.collect()
}
fn classify_title(title: &str) -> (&'static str, &'static str) {
let normalized = title.to_ascii_lowercase();
if contains_any(&normalized, &["ai", "芯片", "科技", "算法", "机器人", "无人机"]) {
return ("technology", "科技");
}
if contains_any(&normalized, &["电影", "综艺", "明星", "周杰伦", "短剧", "娱乐"]) {
return ("entertainment", "娱乐");
}
if contains_any(&normalized, &["足球", "比赛", "联赛", "国足", "体育", "冠军"]) {
return ("sports", "体育");
}
if contains_any(&normalized, &["航母", "作战", "", "军事", "演训"]) {
return ("military", "军事");
}
if contains_any(&normalized, &["出口", "经济", "市场", "财经", "消费", ""]) {
return ("finance", "财经");
}
("society", "社会")
}
fn contains_any(haystack: &str, needles: &[&str]) -> bool {
needles.iter().any(|needle| haystack.contains(needle))
}
fn parse_heat_value(heat_text: &str) -> u64 {
let compact = heat_text.trim().replace(',', "");
if compact.is_empty() {
return 0;
}
let number_part = compact
.chars()
.filter(|ch| ch.is_ascii_digit() || *ch == '.')
.collect::<String>();
let base = number_part.parse::<f64>().unwrap_or(0.0);
let multiplier = if compact.contains('亿') {
100_000_000.0
} else if compact.contains('万') {
10_000.0
} else {
1.0
};
(base * multiplier).round() as u64
}
fn value_to_string(value: &Value) -> String {
match value {
Value::String(text) => text.clone(),
Value::Number(number) => number.to_string(),
Value::Bool(flag) => flag.to_string(),
Value::Null => String::new(),
other => other.to_string(),
}
}
fn value_to_rank(value: &Value) -> Option<u64> {
match value {
Value::Number(number) => number.as_u64(),
Value::String(text) => text.trim().parse::<u64>().ok(),
_ => None,
}
}
fn render_template(payload: &ScreenPayload) -> anyhow::Result<String> {
let payload_json = serde_json::to_string_pretty(payload)?;
let payload_start = TEMPLATE
.find(PAYLOAD_START_MARKER)
.ok_or_else(|| anyhow::anyhow!("default payload start marker missing"))?;
let payload_end = TEMPLATE
.find(PAYLOAD_END_MARKER)
.ok_or_else(|| anyhow::anyhow!("default payload end marker missing"))?;
let replacement = format!(
"{PAYLOAD_START_MARKER}{}\n",
indent_block(&payload_json, " ")
);
Ok(format!(
"{}{}{}",
&TEMPLATE[..payload_start],
replacement,
&TEMPLATE[payload_end..],
))
}
fn indent_block(value: &str, indent: &str) -> String {
value
.lines()
.map(|line| format!("{indent}{line}"))
.collect::<Vec<_>>()
.join("\n")
}
fn write_output_html(path: &Path, rendered: &str) -> anyhow::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
fs::write(path, rendered)?;
Ok(())
}
fn default_output_path(workspace_root: &Path) -> PathBuf {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_nanos())
.unwrap_or_default();
workspace_root
.join("out")
.join(format!("zhihu-hotlist-screen-{nanos}.html"))
}
fn default_snapshot_id() -> String {
format!("zhihu-hotlist-screen-{}", now_ms())
}
fn now_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_millis() as u64)
.unwrap_or_default()
}
fn file_url_for_path(path: &Path) -> String {
Url::from_file_path(path)
.map(|url| url.to_string())
.unwrap_or_else(|_| format!("file://{}", path.display()))
}

View File

@@ -0,0 +1,346 @@
use std::collections::BTreeSet;
use std::path::Path;
use regex::Regex;
use serde_json::{json, Value};
use zeroclaw::tools::Tool;
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"];
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkflowRoute {
ZhihuHotlistExportXlsx,
ZhihuHotlistScreen,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct HotlistItem {
rank: u64,
title: String,
heat: String,
}
pub fn detect_route(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> Option<WorkflowRoute> {
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
return None;
}
let normalized = instruction.to_ascii_lowercase();
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
return Some(WorkflowRoute::ZhihuHotlistScreen);
}
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
}
None
}
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
let normalized = summary.to_ascii_lowercase();
if normalized.contains(".xlsx") || normalized.contains(".html") {
return false;
}
let looks_like_denial = summary.contains("拒绝") ||
normalized.contains("denied") ||
normalized.contains("failed") ||
summary.contains("失败") ||
summary.contains("无法");
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
}
pub fn execute_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
workspace_root: &Path,
instruction: &str,
route: WorkflowRoute,
) -> Result<String, PipeError> {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
}
}
fn collect_hotlist_items<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
top_n: usize,
) -> Result<Vec<HotlistItem>, PipeError> {
navigate_hotlist_with_retry(transport, browser_tool)?;
for selector in HOTLIST_ROOT_SELECTORS {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("getText {selector}"),
})?;
let response = browser_tool.invoke(
Action::GetText,
json!({ "selector": selector }),
ZHIHU_DOMAIN,
)?;
if !response.success {
continue;
}
let text = response.data["text"].as_str().unwrap_or_default();
let items = parse_hotlist_items(text, top_n);
if !items.is_empty() {
return Ok(items);
}
}
Ok(Vec::new())
}
fn navigate_hotlist_with_retry<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
) -> Result<(), PipeError> {
let mut last_error = None;
for _ in 0..2 {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {ZHIHU_HOT_URL}"),
})?;
match browser_tool.invoke(
Action::Navigate,
json!({ "url": ZHIHU_HOT_URL }),
ZHIHU_DOMAIN,
) {
Ok(response) if response.success => return Ok(()),
Ok(response) => {
last_error = Some(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)));
}
Err(err) => last_error = Some(err),
}
}
Err(last_error.unwrap_or_else(|| {
PipeError::Protocol("navigate failed without detailed error".to_string())
}))
}
fn export_xlsx<T: Transport>(
transport: &T,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call openxml_office".to_string(),
})?;
let tool = OpenXmlOfficeTool::new(workspace_root.to_path_buf());
let rows = items
.iter()
.map(|item| json!([item.rank, item.title, item.heat]))
.collect::<Vec<_>>();
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
let result = runtime
.block_on(tool.execute(json!({
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": rows,
})))
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
Ok(format!("已导出知乎热榜 Excel {output_path}"))
}
fn export_screen<T: Transport>(
transport: &T,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call screen_html_export".to_string(),
})?;
let tool = ScreenHtmlExportTool::new(workspace_root.to_path_buf());
let rows = items
.iter()
.map(|item| json!([item.rank, item.title, item.heat]))
.collect::<Vec<_>>();
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
let result = runtime
.block_on(tool.execute(json!({ "rows": rows })))
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
Ok(format!("已生成知乎热榜大屏 {output_path}"))
}
fn parse_hotlist_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
let mut items = parse_single_line_items(text, top_n);
if !items.is_empty() {
return items;
}
let lines = normalize_lines(text);
let mut seen_ranks = BTreeSet::new();
let mut idx = 0usize;
while idx < lines.len() && items.len() < top_n {
let Some(rank) = parse_rank(&lines[idx]) else {
idx += 1;
continue;
};
if !seen_ranks.insert(rank) {
idx += 1;
continue;
}
let mut title = None;
let mut heat = None;
for candidate in lines.iter().skip(idx + 1).take(6) {
if parse_rank(candidate).is_some() {
break;
}
if heat.is_none() && looks_like_heat(candidate) {
heat = Some(normalize_heat(candidate));
continue;
}
if title.is_none() && !is_noise_line(candidate) {
title = Some(candidate.clone());
}
}
if let (Some(title), Some(heat)) = (title, heat) {
items.push(HotlistItem { rank, title, heat });
}
idx += 1;
}
items.sort_by_key(|item| item.rank);
items.truncate(top_n);
items
}
fn parse_single_line_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
let re = Regex::new(
r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$",
)
.expect("valid hotlist single-line regex");
let mut items = Vec::new();
let mut seen_ranks = BTreeSet::new();
for capture in re.captures_iter(text) {
let rank = capture
.get(1)
.and_then(|value| value.as_str().parse::<u64>().ok())
.unwrap_or_default();
if rank == 0 || !seen_ranks.insert(rank) {
continue;
}
let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or("");
let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or("");
if title.is_empty() || heat.is_empty() {
continue;
}
items.push(HotlistItem {
rank,
title: title.to_string(),
heat: normalize_heat(heat),
});
if items.len() >= top_n {
break;
}
}
items
}
fn normalize_lines(text: &str) -> Vec<String> {
text.lines()
.map(str::trim)
.filter(|line| !line.is_empty())
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
.collect()
}
fn parse_rank(line: &str) -> Option<u64> {
let trimmed = line.trim();
if trimmed.is_empty() {
return None;
}
if trimmed.chars().all(|ch| ch.is_ascii_digit()) {
return trimmed.parse::<u64>().ok().filter(|value| *value > 0);
}
let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex");
rank_re
.captures(trimmed)
.and_then(|capture| capture.get(1))
.and_then(|value| value.as_str().parse::<u64>().ok())
.filter(|value| *value > 0)
}
fn looks_like_heat(line: &str) -> bool {
let compact = line.replace(' ', "");
let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex");
heat_re.is_match(compact.as_str())
}
fn normalize_heat(line: &str) -> String {
line.replace(' ', "")
.trim_end_matches("热度")
.to_string()
}
fn is_noise_line(line: &str) -> bool {
matches!(
line,
"知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容"
)
}
fn extract_top_n(instruction: &str) -> usize {
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
re.captures(&instruction.to_ascii_lowercase())
.and_then(|capture| capture.get(1))
.and_then(|value| value.as_str().parse::<usize>().ok())
.filter(|value| *value > 0)
.unwrap_or(10)
}

316
src/runtime/engine.rs Normal file
View File

@@ -0,0 +1,316 @@
use std::path::Path;
use std::sync::Arc;
use zeroclaw::agent::dispatcher::NativeToolDispatcher;
use zeroclaw::agent::Agent;
use zeroclaw::config::{Config as ZeroClawConfig, SkillsPromptInjectionMode};
use zeroclaw::memory::Memory;
use zeroclaw::observability::{NoopObserver, Observer};
use zeroclaw::providers::Provider;
use zeroclaw::runtime::NativeRuntime;
use zeroclaw::tools::{self, ReadSkillTool};
use zeroclaw::SecurityPolicy;
use crate::compat::memory_adapter::build_memory;
use crate::pipe::PipeError;
use crate::runtime::{RuntimeProfile, ToolPolicy};
const BROWSER_ACTION_TOOL_NAME: &str = "browser_action";
const SUPERRPA_BROWSER_TOOL_NAME: &str = "superrpa_browser";
const READ_SKILL_TOOL_NAME: &str = "read_skill";
const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office";
const SCREEN_HTML_EXPORT_TOOL_NAME: &str = "screen_html_export";
const BROWSER_TOOL_CONTRACT_PROMPT: &str = "SuperRPA browser interface contract:\n- Use superrpa_browser as the preferred dedicated SuperRPA interface inside this browser host.\n- browser_action is a legacy alias with the same contract; prefer superrpa_browser when choosing between them.\n- Browser actions allowed by policy are already approved by the user inside this BrowserAttached host.\n- Do not claim a browser action was denied, blocked, or rejected unless an actual tool call returns an error.\n- expected_domain must be the bare hostname only, for example www.zhihu.com.\n- Never include scheme, path, query, fragment, or port in expected_domain.\n- selector values are executed with document.querySelector(...), so they must be valid CSS selectors only.\n- Never use XPath selectors or jQuery-style :contains().\n- Prefer direct navigation to canonical URLs when they are known, instead of clicking text links to reach common pages.\n- If you need broad page content, use getText with a valid CSS selector such as body or a stable container.\n- If a task matches an installed skill, load that skill first and then execute it through the SuperRPA interface.";
const ZHIHU_HOTLIST_EXECUTION_PROMPT: &str = "Zhihu hotlist execution contract:\n- Treat Zhihu hotlist export/presentation requests as a real browser workflow, not as a text-only summarization task.\n- You must attempt the browser workflow before concluding failure; a prose-only answer is invalid for this workflow.\n- If the current page is not already `https://www.zhihu.com/hot`, navigate there first.\n- Collect the live list with superrpa_browser using `getText` on `main` first; only fall back to `body` or `html` if `main` is unavailable.\n- Extract ordered rows containing `rank`, `title`, and `heat` from the live page text.\n- Do not use shell, web_fetch, web_search_tool, or fabricated sample data for this workflow.\n- Do not repeat the same sentence or section in your final answer.";
const OFFICE_EXPORT_COMPLETION_PROMPT: &str = "Export completion contract:\n- This task requires a real Excel export.\n- After the Zhihu rows are available, you must call openxml_office before finishing.\n- Never fabricate, simulate, or invent substitute hotlist data when a live collection/export task fails.\n- If live collection fails, report the failure concisely instead of producing fake rows.\n- Do not stop after describing how you will parse or export the data.\n- Do not repeat the same sentence or section in your final answer.\n- Your final answer must include the generated local .xlsx path.";
const SCREEN_EXPORT_COMPLETION_PROMPT: &str = "Presentation completion contract:\n- This task requires a real dashboard artifact.\n- After the Zhihu rows are available, you must call screen_html_export before finishing.\n- Do not stop after describing how you will render or present the data.\n- Do not repeat the same sentence or section in your final answer.\n- Your final answer must include the local .html path and the presentation object.";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RuntimeEngine {
profile: RuntimeProfile,
tool_policy: ToolPolicy,
}
impl RuntimeEngine {
pub fn new(profile: RuntimeProfile) -> Self {
Self {
profile,
tool_policy: ToolPolicy::for_profile(profile),
}
}
pub fn profile(&self) -> RuntimeProfile {
self.profile
}
pub fn tool_policy(&self) -> &ToolPolicy {
&self.tool_policy
}
pub fn browser_surface_enabled(&self) -> bool {
self.tool_policy
.allowed_tools
.iter()
.any(|tool| {
tool == BROWSER_ACTION_TOOL_NAME || tool == SUPERRPA_BROWSER_TOOL_NAME
})
}
pub fn build_agent(
&self,
provider: Box<dyn Provider>,
config: &ZeroClawConfig,
skills_dir: &Path,
mut tools: Vec<Box<dyn zeroclaw::tools::Tool>>,
browser_surface_present: bool,
instruction: &str,
) -> Result<Agent, PipeError> {
let memory: Arc<dyn Memory> =
Arc::from(build_memory(config).map_err(map_anyhow_to_pipe_error)?);
let security = Arc::new(SecurityPolicy::from_config(
&config.autonomy,
&config.workspace_dir,
));
let observer: Arc<dyn Observer> = Arc::new(NoopObserver);
let skills = load_runtime_skills(config, skills_dir);
let (mut runtime_tools, _, _, _, _, _) = tools::all_tools_with_runtime(
Arc::new(config.clone()),
&security,
Arc::new(NativeRuntime::new()),
memory.clone(),
None,
None,
&config.browser,
&config.http_request,
&config.web_fetch,
&config.workspace_dir,
&config.agents,
config.api_key.as_deref(),
config,
None,
);
runtime_tools.append(&mut tools);
if matches!(
config.skills.prompt_injection_mode,
SkillsPromptInjectionMode::Compact
) && skills_dir != config.workspace_dir.join("skills")
{
runtime_tools.retain(|tool| tool.name() != READ_SKILL_TOOL_NAME);
runtime_tools.push(Box::new(ReadSkillTool::with_runtime_skills_dir(
config.workspace_dir.clone(),
Some(skills_dir.to_path_buf()),
config.skills.allow_scripts,
config.skills.open_skills_enabled,
config.skills.open_skills_dir.clone(),
)));
}
Agent::builder()
.provider(provider)
.tools(runtime_tools)
.memory(memory)
.observer(observer)
.tool_dispatcher(Box::new(NativeToolDispatcher))
.config(config.agent.clone())
.model_name(
config
.default_model
.clone()
.unwrap_or_else(|| "deepseek-chat".to_string()),
)
.temperature(config.default_temperature)
.workspace_dir(config.workspace_dir.clone())
.skills(skills)
.skills_prompt_mode(config.skills.prompt_injection_mode)
.allowed_tools(self.allowed_tools_for_config(
config,
browser_surface_present,
instruction,
))
.build()
.map_err(map_anyhow_to_pipe_error)
}
pub fn build_instruction(
&self,
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
browser_surface_present: bool,
) -> String {
let trimmed_instruction = instruction.trim();
if !browser_surface_present || !self.browser_surface_enabled() {
return trimmed_instruction.to_string();
}
let mut sections = vec![BROWSER_TOOL_CONTRACT_PROMPT.to_string()];
if is_zhihu_hotlist_task(trimmed_instruction, page_url, page_title) {
sections.push(ZHIHU_HOTLIST_EXECUTION_PROMPT.to_string());
}
if task_needs_office_export(trimmed_instruction) {
sections.push(OFFICE_EXPORT_COMPLETION_PROMPT.to_string());
}
if task_needs_screen_export(trimmed_instruction) {
sections.push(SCREEN_EXPORT_COMPLETION_PROMPT.to_string());
}
if let Some(page_context) = build_page_context_message(page_url, page_title) {
sections.push(page_context);
}
sections.push(format!("User task: {trimmed_instruction}"));
sections.join("\n\n")
}
pub fn loaded_skill_names(
&self,
config: &ZeroClawConfig,
skills_dir: &Path,
) -> Vec<String> {
let mut names = load_runtime_skills(config, skills_dir)
.into_iter()
.map(|skill| skill.name)
.collect::<Vec<_>>();
names.sort();
names.dedup();
names
}
pub fn should_attach_openxml_office_tool(&self, instruction: &str) -> bool {
task_needs_office_export(instruction)
}
pub fn should_attach_screen_html_export_tool(&self, instruction: &str) -> bool {
task_needs_screen_export(instruction)
}
fn allowed_tools_for_config(
&self,
config: &ZeroClawConfig,
browser_surface_present: bool,
instruction: &str,
) -> Option<Vec<String>> {
let mut allowed_tools = self.tool_policy.allowed_tools.clone();
if !browser_surface_present {
allowed_tools.retain(|tool| {
tool != BROWSER_ACTION_TOOL_NAME && tool != SUPERRPA_BROWSER_TOOL_NAME
});
}
if matches!(
config.skills.prompt_injection_mode,
SkillsPromptInjectionMode::Compact
) {
allowed_tools.push(READ_SKILL_TOOL_NAME.to_string());
}
if task_needs_office_export(instruction) {
allowed_tools.push(OPENXML_OFFICE_TOOL_NAME.to_string());
}
if task_needs_screen_export(instruction) {
allowed_tools.push(SCREEN_HTML_EXPORT_TOOL_NAME.to_string());
}
if task_needs_local_file_read(instruction) {
allowed_tools.push("file_read".to_string());
}
allowed_tools.dedup();
if matches!(self.profile, RuntimeProfile::GeneralAssistant) &&
self.tool_policy.may_use_non_browser_tools
{
None
} else {
Some(allowed_tools)
}
}
}
fn task_needs_local_file_read(instruction: &str) -> bool {
let normalized = instruction.trim();
normalized.contains("/home/") ||
normalized.contains("./") ||
normalized.contains("../")
}
pub fn is_zhihu_hotlist_task(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> bool {
let normalized_instruction = instruction.to_ascii_lowercase();
let normalized_url = page_url.unwrap_or_default().to_ascii_lowercase();
let normalized_title = page_title.unwrap_or_default().to_ascii_lowercase();
let is_zhihu = normalized_instruction.contains("zhihu") ||
instruction.contains("知乎") ||
normalized_url.contains("zhihu.com") ||
normalized_title.contains("zhihu") ||
page_title.unwrap_or_default().contains("知乎");
let is_hotlist = normalized_instruction.contains("hotlist") ||
instruction.contains("热榜") ||
normalized_url.contains("/hot") ||
normalized_title.contains("hotlist") ||
page_title.unwrap_or_default().contains("热榜");
is_zhihu && is_hotlist
}
fn task_needs_office_export(instruction: &str) -> bool {
let normalized = instruction.to_ascii_lowercase();
normalized.contains("excel")
|| normalized.contains(".xlsx")
|| normalized.contains("导出")
|| normalized.contains("xlsx")
}
fn task_needs_screen_export(instruction: &str) -> bool {
let normalized = instruction.to_ascii_lowercase();
normalized.contains("大屏")
|| normalized.contains("看板")
|| normalized.contains("dashboard")
|| normalized.contains("screen")
|| normalized.contains("echarts")
|| normalized.contains("演示")
|| normalized.contains("汇报")
}
fn load_runtime_skills(config: &ZeroClawConfig, skills_dir: &Path) -> Vec<zeroclaw::skills::Skill> {
let default_skills_dir = config.workspace_dir.join("skills");
if skills_dir == default_skills_dir {
return zeroclaw::skills::load_skills_with_config(&config.workspace_dir, config);
}
let mut skills = zeroclaw::skills::load_skills_with_config(&config.workspace_dir, config);
skills.retain(|skill| {
skill
.location
.as_ref()
.map(|location| !location.starts_with(&default_skills_dir))
.unwrap_or(true)
});
skills.extend(zeroclaw::skills::load_skills_from_directory(
skills_dir,
config.skills.allow_scripts,
));
skills
}
fn build_page_context_message(page_url: Option<&str>, page_title: Option<&str>) -> Option<String> {
let mut parts = Vec::new();
if let Some(page_url) = page_url.map(str::trim).filter(|value| !value.is_empty()) {
parts.push(format!("Current page URL: {page_url}"));
}
if let Some(page_title) = page_title.map(str::trim).filter(|value| !value.is_empty()) {
parts.push(format!("Current page title: {page_title}"));
}
if parts.is_empty() {
return None;
}
Some(format!(
"Current browser context:\n{}",
parts.join("\n")
))
}
fn map_anyhow_to_pipe_error(err: anyhow::Error) -> PipeError {
PipeError::Protocol(err.to_string())
}

7
src/runtime/mod.rs Normal file
View File

@@ -0,0 +1,7 @@
mod engine;
mod profile;
mod tool_policy;
pub use engine::{is_zhihu_hotlist_task, RuntimeEngine};
pub use profile::RuntimeProfile;
pub use tool_policy::ToolPolicy;

View File

@@ -0,0 +1,36 @@
use crate::runtime::RuntimeProfile;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ToolPolicy {
pub requires_browser_surface: bool,
pub may_use_non_browser_tools: bool,
pub allowed_tools: Vec<String>,
}
impl ToolPolicy {
pub fn for_profile(profile: RuntimeProfile) -> Self {
match profile {
RuntimeProfile::BrowserAttached => Self {
requires_browser_surface: false,
may_use_non_browser_tools: true,
allowed_tools: vec![
"superrpa_browser".to_string(),
"browser_action".to_string(),
],
},
RuntimeProfile::BrowserHeavy => Self {
requires_browser_surface: true,
may_use_non_browser_tools: true,
allowed_tools: vec![
"superrpa_browser".to_string(),
"browser_action".to_string(),
],
},
RuntimeProfile::GeneralAssistant => Self {
requires_browser_surface: false,
may_use_non_browser_tools: true,
allowed_tools: Vec::new(),
},
}
}
}