sgclaw: snapshot today's runtime and skill updates

This commit is contained in:
zyl
2026-03-30 15:05:39 +08:00
parent c793bfc6a1
commit f51d6b7659
50 changed files with 3473 additions and 621 deletions

View File

@@ -26,10 +26,15 @@ impl<T: Transport> BrowserScriptSkillTool<T> {
browser_tool: BrowserPipeTool<T>,
) -> anyhow::Result<Self> {
let script_path = skill_root.join(&tool.command);
let canonical_skill_root = skill_root.canonicalize().unwrap_or_else(|_| skill_root.to_path_buf());
let canonical_script_path = script_path
let canonical_skill_root = skill_root
.canonicalize()
.map_err(|err| anyhow::anyhow!("failed to resolve browser script {}: {err}", script_path.display()))?;
.unwrap_or_else(|_| skill_root.to_path_buf());
let canonical_script_path = script_path.canonicalize().map_err(|err| {
anyhow::anyhow!(
"failed to resolve browser script {}: {err}",
script_path.display()
)
})?;
if !canonical_script_path.starts_with(&canonical_skill_root) {
anyhow::bail!(
"browser script path escapes skill root: {}",
@@ -108,7 +113,11 @@ impl<T: Transport + 'static> Tool for BrowserScriptSkillTool<T> {
"expected_domain must be a non-empty string, got {other}"
)))
}
None => return Ok(failed_tool_result("missing required field expected_domain".to_string())),
None => {
return Ok(failed_tool_result(
"missing required field expected_domain".to_string(),
))
}
};
let expected_domain = match normalize_domain_like(&raw_expected_domain) {
Some(value) => value,
@@ -148,7 +157,9 @@ impl<T: Transport + 'static> Tool for BrowserScriptSkillTool<T> {
};
if !result.success {
return Ok(failed_tool_result(format_browser_script_error(&result.data)));
return Ok(failed_tool_result(format_browser_script_error(
&result.data,
)));
}
let payload = result

View File

@@ -101,14 +101,14 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
Err(err) => return Ok(failed_tool_result(err.to_string())),
};
let result = match self.browser_tool.invoke(
request.action,
request.params,
&request.expected_domain,
) {
Ok(result) => result,
Err(err) => return Ok(failed_tool_result(err.to_string())),
};
let result =
match self
.browser_tool
.invoke(request.action, request.params, &request.expected_domain)
{
Ok(result) => result,
Err(err) => return Ok(failed_tool_result(err.to_string())),
};
let output = serde_json::to_string(&json!({
"seq": result.seq,
@@ -122,8 +122,7 @@ impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
Ok(ToolResult {
success: result.success,
output,
error: (!result.success)
.then(|| format_browser_action_error(&result.data)),
error: (!result.success).then(|| format_browser_action_error(&result.data)),
})
}
}
@@ -134,7 +133,9 @@ struct BrowserActionRequest {
params: Value,
}
fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, BrowserActionAdapterError> {
fn parse_browser_action_request(
args: Value,
) -> Result<BrowserActionRequest, BrowserActionAdapterError> {
let mut args = match args {
Value::Object(args) => args,
other => {

View File

@@ -2,8 +2,8 @@ use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use zeroclaw::Config as ZeroClawConfig;
use zeroclaw::config::schema::ModelProviderConfig;
use zeroclaw::Config as ZeroClawConfig;
use crate::compat::cron_adapter::configure_embedded_cron;
use crate::compat::memory_adapter::configure_embedded_memory;
@@ -13,7 +13,9 @@ use crate::runtime::RuntimeProfile;
const SGCLAW_ZEROCLAW_WORKSPACE_DIR: &str = ".sgclaw-zeroclaw-workspace";
const SKILLS_DIR_NAME: &str = "skills";
pub fn build_zeroclaw_config(workspace_root: &Path) -> Result<ZeroClawConfig, crate::config::ConfigError> {
pub fn build_zeroclaw_config(
workspace_root: &Path,
) -> Result<ZeroClawConfig, crate::config::ConfigError> {
let settings = SgClawSettings::from_env()?;
Ok(build_zeroclaw_config_from_sgclaw_settings(
workspace_root,

View File

@@ -65,7 +65,10 @@ where
for job in jobs {
if !matches!(job.job_type, JobType::Agent) {
anyhow::bail!("unsupported cron job type in sgclaw compat: {:?}", job.job_type);
anyhow::bail!(
"unsupported cron job type in sgclaw compat: {:?}",
job.job_type
);
}
let started_at = Utc::now();

View File

@@ -14,19 +14,17 @@ pub fn log_entry_for_turn_event(
level: "info".to_string(),
message: format_tool_call(name, args, skill_versions),
}),
TurnEvent::ToolResult { output, .. } if is_tool_error(output) => Some(AgentMessage::LogEntry {
level: "error".to_string(),
message: output.trim_start_matches("Error: ").to_string(),
}),
TurnEvent::ToolResult { output, .. } if is_tool_error(output) => {
Some(AgentMessage::LogEntry {
level: "error".to_string(),
message: output.trim_start_matches("Error: ").to_string(),
})
}
_ => None,
}
}
fn format_tool_call(
name: &str,
args: &Value,
skill_versions: &HashMap<String, String>,
) -> String {
fn format_tool_call(name: &str, args: &Value, skill_versions: &HashMap<String, String>) -> String {
if name == "read_skill" {
let skill_name = args
.get("name")
@@ -49,7 +47,10 @@ fn format_tool_call(
match action {
"navigate" => {
let url = args.get("url").and_then(Value::as_str).unwrap_or("<missing-url>");
let url = args
.get("url")
.and_then(Value::as_str)
.unwrap_or("<missing-url>");
format!("navigate {url}")
}
"type" => {

View File

@@ -1,8 +1,8 @@
use async_trait::async_trait;
use serde::Deserialize;
use serde_json::{json, Value};
use std::collections::BTreeSet;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
@@ -93,7 +93,11 @@ impl Tool for OpenXmlOfficeTool {
return Ok(failed_tool_result("rows must not be empty".to_string()));
}
if parsed.rows.iter().any(|row| row.len() != parsed.columns.len()) {
if parsed
.rows
.iter()
.any(|row| row.len() != parsed.columns.len())
{
return Ok(failed_tool_result(
"each row must match the declared columns length".to_string(),
));
@@ -153,10 +157,10 @@ fn failed_tool_result(error: String) -> ToolResult {
}
fn create_job_root(workspace_root: &Path) -> anyhow::Result<PathBuf> {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)?
.as_nanos();
let path = workspace_root.join(".sgclaw-openxml").join(format!("{nanos}"));
let nanos = SystemTime::now().duration_since(UNIX_EPOCH)?.as_nanos();
let path = workspace_root
.join(".sgclaw-openxml")
.join(format!("{nanos}"));
fs::create_dir_all(&path)?;
Ok(path)
}
@@ -188,10 +192,7 @@ fn resolve_column_order(
.iter()
.map(|value| value.to_string())
.collect::<BTreeSet<_>>();
let expected_set = expected_columns
.iter()
.cloned()
.collect::<BTreeSet<_>>();
let expected_set = expected_columns.iter().cloned().collect::<BTreeSet<_>>();
if provided_set != expected_set {
return None;

View File

@@ -9,6 +9,12 @@ pub fn should_use_primary_orchestration(
page_url: Option<&str>,
page_title: Option<&str>,
) -> bool {
if crate::compat::workflow_executor::detect_route(instruction, page_url, page_title)
.is_some_and(|route| crate::compat::workflow_executor::prefers_direct_execution(&route))
{
return true;
}
let normalized = instruction.to_ascii_lowercase();
let needs_export = normalized.contains("excel")
|| normalized.contains("xlsx")
@@ -33,6 +39,18 @@ pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
);
if let Some(route) = route.clone() {
if crate::compat::workflow_executor::prefers_direct_execution(&route) {
return crate::compat::workflow_executor::execute_route(
transport,
&browser_tool,
workspace_root,
instruction,
task_context,
route,
);
}
}
let primary_result = crate::compat::runtime::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
@@ -44,13 +62,16 @@ pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
match (route, primary_result) {
(Some(route), Ok(summary))
if crate::compat::workflow_executor::should_fallback_after_summary(&summary, &route) =>
if crate::compat::workflow_executor::should_fallback_after_summary(
&summary, &route,
) =>
{
crate::compat::workflow_executor::execute_route(
transport,
&browser_tool,
workspace_root,
instruction,
task_context,
route,
)
}
@@ -60,6 +81,7 @@ pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
&browser_tool,
workspace_root,
instruction,
task_context,
route,
),
(None, Err(err)) => Err(err),

View File

@@ -5,23 +5,18 @@ use async_trait::async_trait;
use futures_util::{stream, StreamExt};
use zeroclaw::agent::TurnEvent;
use zeroclaw::config::Config as ZeroClawConfig;
use zeroclaw::providers::{
self, ChatMessage, ChatRequest, ChatResponse, Provider,
};
use zeroclaw::providers::traits::{
ProviderCapabilities, StreamEvent, StreamOptions, StreamResult,
};
use zeroclaw::providers::traits::{ProviderCapabilities, StreamEvent, StreamOptions, StreamResult};
use zeroclaw::providers::{self, ChatMessage, ChatRequest, ChatResponse, Provider};
use crate::compat::browser_script_skill_tool::build_browser_script_skill_tools;
use crate::compat::browser_tool_adapter::ZeroClawBrowserTool;
use crate::compat::config_adapter::{
build_zeroclaw_config_from_sgclaw_settings,
resolve_skills_dir_from_sgclaw_settings,
build_zeroclaw_config_from_sgclaw_settings, resolve_skills_dir_from_sgclaw_settings,
};
use crate::compat::event_bridge::log_entry_for_turn_event;
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
use crate::config::{DeepSeekSettings, OfficeBackend, SgClawSettings};
use crate::compat::event_bridge::log_entry_for_turn_event;
use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport};
use crate::runtime::RuntimeEngine;
@@ -136,13 +131,17 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
.map_err(map_anyhow_to_pipe_error)?,
);
}
if matches!(settings.office_backend, OfficeBackend::OpenXml) &&
engine.should_attach_openxml_office_tool(instruction)
if matches!(settings.office_backend, OfficeBackend::OpenXml)
&& engine.should_attach_openxml_office_tool(instruction)
{
tools.push(Box::new(OpenXmlOfficeTool::new(config.workspace_dir.clone())));
tools.push(Box::new(OpenXmlOfficeTool::new(
config.workspace_dir.clone(),
)));
}
if engine.should_attach_screen_html_export_tool(instruction) {
tools.push(Box::new(ScreenHtmlExportTool::new(config.workspace_dir.clone())));
tools.push(Box::new(ScreenHtmlExportTool::new(
config.workspace_dir.clone(),
)));
}
let mut agent = engine.build_agent(
provider,
@@ -190,10 +189,7 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
fn build_provider(config: &ZeroClawConfig) -> Result<Box<dyn Provider>, PipeError> {
let provider_name = config.default_provider.as_deref().unwrap_or("deepseek");
let model_name = config
.default_model
.as_deref()
.unwrap_or("deepseek-chat");
let model_name = config.default_model.as_deref().unwrap_or("deepseek-chat");
let runtime_options = providers::provider_runtime_options_from_config(config);
let resolved_provider_name = if provider_name == "deepseek" {
config
@@ -258,7 +254,9 @@ impl Provider for NonStreamingProvider {
model: &str,
temperature: f64,
) -> anyhow::Result<String> {
self.inner.chat_with_history(messages, model, temperature).await
self.inner
.chat_with_history(messages, model, temperature)
.await
}
async fn chat(

View File

@@ -238,29 +238,40 @@ fn derive_categories(table: &[ScreenTableRow]) -> Vec<ScreenCategory> {
grouped
.into_iter()
.map(|((category_code, category_label), (item_count, total_heat))| ScreenCategory {
category_code,
category_label,
item_count,
total_heat,
avg_heat: if item_count == 0 {
0
} else {
total_heat / item_count
.map(
|((category_code, category_label), (item_count, total_heat))| ScreenCategory {
category_code,
category_label,
item_count,
total_heat,
avg_heat: if item_count == 0 {
0
} else {
total_heat / item_count
},
},
})
)
.collect()
}
fn classify_title(title: &str) -> (&'static str, &'static str) {
let normalized = title.to_ascii_lowercase();
if contains_any(&normalized, &["ai", "芯片", "科技", "算法", "机器人", "无人机"]) {
if contains_any(
&normalized,
&["ai", "芯片", "科技", "算法", "机器人", "无人机"],
) {
return ("technology", "科技");
}
if contains_any(&normalized, &["电影", "综艺", "明星", "周杰伦", "短剧", "娱乐"]) {
if contains_any(
&normalized,
&["电影", "综艺", "明星", "周杰伦", "短剧", "娱乐"],
) {
return ("entertainment", "娱乐");
}
if contains_any(&normalized, &["足球", "比赛", "联赛", "国足", "体育", "冠军"]) {
if contains_any(
&normalized,
&["足球", "比赛", "联赛", "国足", "体育", "冠军"],
) {
return ("sports", "体育");
}
if contains_any(&normalized, &["航母", "作战", "", "军事", "演训"]) {

View File

@@ -1,20 +1,17 @@
use std::fs;
use std::path::Path;
use std::thread;
use std::time::Duration;
use regex::Regex;
use serde_json::{json, Value};
use zeroclaw::tools::Tool;
use crate::compat::runtime::CompatTaskContext;
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::runtime::CompatTaskContext;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
use crate::pipe::{
Action,
AgentMessage,
BrowserPipeTool,
ConversationMessage,
PipeError,
Transport,
Action, AgentMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
};
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
@@ -22,6 +19,10 @@ const ZHIHU_EDITOR_DOMAIN: &str = "zhuanlan.zhihu.com";
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
const ZHIHU_CREATOR_URL: &str = "https://www.zhihu.com/creator";
const ZHIHU_EDITOR_URL: &str = "https://zhuanlan.zhihu.com/write";
const HOTLIST_READY_POLL_ATTEMPTS: usize = 10;
const HOTLIST_READY_POLL_INTERVAL: Duration = Duration::from_millis(500);
const HOTLIST_TEXT_READY_PATTERN: &str =
r"(?:^|\n)\s*1(?:[.、]|\s)+.+\d+(?:\.\d+)?\s*(?:万|亿|k|K|m|M)(?:热度)?";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkflowRoute {
ZhihuHotlistExportXlsx,
@@ -51,10 +52,16 @@ pub fn detect_route(
) -> Option<WorkflowRoute> {
if crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
let normalized = instruction.to_ascii_lowercase();
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
if normalized.contains("dashboard")
|| instruction.contains("大屏")
|| instruction.contains("新标签页")
{
return Some(WorkflowRoute::ZhihuHotlistScreen);
}
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
if normalized.contains("excel")
|| normalized.contains("xlsx")
|| instruction.contains("导出")
{
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
}
}
@@ -73,9 +80,11 @@ pub fn detect_route(
pub fn prefers_direct_execution(route: &WorkflowRoute) -> bool {
matches!(
route,
WorkflowRoute::ZhihuArticleEntry |
WorkflowRoute::ZhihuArticleDraft |
WorkflowRoute::ZhihuArticlePublish
WorkflowRoute::ZhihuHotlistExportXlsx
| WorkflowRoute::ZhihuHotlistScreen
| WorkflowRoute::ZhihuArticleEntry
| WorkflowRoute::ZhihuArticleDraft
| WorkflowRoute::ZhihuArticlePublish
)
}
@@ -85,22 +94,23 @@ pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bo
return false;
}
let looks_like_denial = summary.contains("拒绝") ||
normalized.contains("denied") ||
normalized.contains("failed") ||
normalized.contains("protocol error") ||
normalized.contains("maximum tool iterations") ||
summary.contains("失败") ||
summary.contains("无法");
let looks_like_denial = summary.contains("拒绝")
|| normalized.contains("denied")
|| normalized.contains("failed")
|| normalized.contains("protocol error")
|| normalized.contains("maximum tool iterations")
|| summary.contains("失败")
|| summary.contains("无法");
looks_like_denial || matches!(
route,
WorkflowRoute::ZhihuHotlistExportXlsx |
WorkflowRoute::ZhihuHotlistScreen |
WorkflowRoute::ZhihuArticleEntry |
WorkflowRoute::ZhihuArticleDraft |
WorkflowRoute::ZhihuArticlePublish
)
looks_like_denial
|| matches!(
route,
WorkflowRoute::ZhihuHotlistExportXlsx
| WorkflowRoute::ZhihuHotlistScreen
| WorkflowRoute::ZhihuArticleEntry
| WorkflowRoute::ZhihuArticleDraft
| WorkflowRoute::ZhihuArticlePublish
)
}
pub fn execute_route<T: Transport + 'static>(
@@ -114,15 +124,19 @@ pub fn execute_route<T: Transport + 'static>(
match route {
WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen => {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
let items = collect_hotlist_items(transport, browser_tool, top_n, task_context)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistExportXlsx => {
export_xlsx(transport, workspace_root, &items)
}
WorkflowRoute::ZhihuHotlistScreen => {
export_screen(transport, workspace_root, &items)
}
_ => unreachable!("handled by outer match"),
}
}
@@ -142,8 +156,9 @@ fn collect_hotlist_items<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
top_n: usize,
task_context: &CompatTaskContext,
) -> Result<Vec<HotlistItem>, PipeError> {
navigate_hotlist_with_retry(transport, browser_tool)?;
ensure_hotlist_page_ready(transport, browser_tool, task_context)?;
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-hotlist.extract_hotlist".to_string(),
@@ -168,35 +183,87 @@ fn collect_hotlist_items<T: Transport + 'static>(
parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data))
}
fn navigate_hotlist_with_retry<T: Transport + 'static>(
fn ensure_hotlist_page_ready<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
task_context: &CompatTaskContext,
) -> Result<(), PipeError> {
let starts_on_hotlist = task_context
.page_url
.as_deref()
.is_some_and(|url| url.starts_with(ZHIHU_HOT_URL))
|| task_context
.page_title
.as_deref()
.is_some_and(|title| title.contains("热榜"));
if starts_on_hotlist && poll_for_hotlist_readiness(browser_tool)? {
return Ok(());
}
let mut last_error = None;
for attempt in 0..2 {
navigate_hotlist_page(transport, browser_tool)?;
if poll_for_hotlist_readiness(browser_tool)? {
return Ok(());
}
last_error = Some(PipeError::Protocol(format!(
"知乎热榜页面已打开但在短轮询窗口内仍未出现可读热榜内容attempt={}",
attempt + 1
)));
}
Err(last_error.unwrap_or_else(|| PipeError::Protocol("知乎热榜页面未就绪".to_string())))
}
fn navigate_hotlist_page<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
) -> Result<(), PipeError> {
let mut last_error = None;
for _ in 0..2 {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {ZHIHU_HOT_URL}"),
})?;
match browser_tool.invoke(
Action::Navigate,
json!({ "url": ZHIHU_HOT_URL }),
ZHIHU_DOMAIN,
) {
Ok(response) if response.success => return Ok(()),
Ok(response) => {
last_error = Some(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)));
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {ZHIHU_HOT_URL}"),
})?;
let response = browser_tool.invoke(
Action::Navigate,
json!({ "url": ZHIHU_HOT_URL }),
ZHIHU_DOMAIN,
)?;
if response.success {
Ok(())
} else {
Err(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)))
}
}
fn poll_for_hotlist_readiness<T: Transport + 'static>(
browser_tool: &BrowserPipeTool<T>,
) -> Result<bool, PipeError> {
let ready_pattern =
Regex::new(HOTLIST_TEXT_READY_PATTERN).expect("hotlist readiness regex must compile");
for attempt in 0..HOTLIST_READY_POLL_ATTEMPTS {
let response =
browser_tool.invoke(Action::GetText, json!({ "selector": "body" }), ZHIHU_DOMAIN)?;
if response.success {
let payload = response.data.get("text").unwrap_or(&response.data);
if hotlist_text_looks_ready(payload, &ready_pattern) {
return Ok(true);
}
Err(err) => last_error = Some(err),
}
if attempt + 1 < HOTLIST_READY_POLL_ATTEMPTS {
thread::sleep(HOTLIST_READY_POLL_INTERVAL);
}
}
Ok(false)
}
Err(last_error.unwrap_or_else(|| {
PipeError::Protocol("navigate failed without detailed error".to_string())
}))
fn hotlist_text_looks_ready(payload: &Value, ready_pattern: &Regex) -> bool {
let text = payload.as_str().unwrap_or_default();
text.contains("热榜") && ready_pattern.is_match(text)
}
fn export_xlsx<T: Transport>(
@@ -224,15 +291,17 @@ fn export_xlsx<T: Transport>(
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "openxml_office failed".to_string()),
result
.error
.unwrap_or_else(|| "openxml_office failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid openxml_office output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("openxml_office did not return output_path".to_string()))?;
let output_path = payload["output_path"].as_str().ok_or_else(|| {
PipeError::Protocol("openxml_office did not return output_path".to_string())
})?;
Ok(format!("已导出知乎热榜 Excel {output_path}"))
}
@@ -257,15 +326,17 @@ fn export_screen<T: Transport>(
.map_err(|err| PipeError::Protocol(err.to_string()))?;
if !result.success {
return Err(PipeError::Protocol(
result.error.unwrap_or_else(|| "screen_html_export failed".to_string()),
result
.error
.unwrap_or_else(|| "screen_html_export failed".to_string()),
));
}
let payload: Value = serde_json::from_str(&result.output)
.map_err(|err| PipeError::Protocol(format!("invalid screen_html_export output: {err}")))?;
let output_path = payload["output_path"]
.as_str()
.ok_or_else(|| PipeError::Protocol("screen_html_export did not return output_path".to_string()))?;
let output_path = payload["output_path"].as_str().ok_or_else(|| {
PipeError::Protocol("screen_html_export did not return output_path".to_string())
})?;
Ok(format!("已生成知乎热榜大屏 {output_path}"))
}
@@ -300,7 +371,9 @@ fn execute_zhihu_article_route<T: Transport + 'static>(
ZHIHU_DOMAIN,
)?;
if is_login_required_payload(&creator_state) {
return Ok(build_login_block_message(payload_current_url(&creator_state)));
return Ok(build_login_block_message(payload_current_url(
&creator_state,
)));
}
if payload_status(&creator_state) == Some("creator_home") {
return Ok(build_creator_entry_missing_message(payload_current_url(
@@ -321,10 +394,14 @@ fn execute_zhihu_article_route<T: Transport + 'static>(
ZHIHU_EDITOR_DOMAIN,
)?;
if is_login_required_payload(&editor_state) {
return Ok(build_login_block_message(payload_current_url(&editor_state)));
return Ok(build_login_block_message(payload_current_url(
&editor_state,
)));
}
if payload_status(&editor_state) != Some("editor_ready") {
return Ok(build_editor_unavailable_message(payload_current_url(&editor_state)));
return Ok(build_editor_unavailable_message(payload_current_url(
&editor_state,
)));
}
transport.send(&AgentMessage::LogEntry {
@@ -347,7 +424,10 @@ fn execute_zhihu_article_route<T: Transport + 'static>(
}
match payload_status(&fill_result) {
Some("draft_ready") => Ok(format!("已进入知乎文章编辑器并写入草稿《{}", article.title)),
Some("draft_ready") => Ok(format!(
"已进入知乎文章编辑器并写入草稿《{}",
article.title
)),
Some("publish_clicked") | Some("publish_submitted") => {
Ok(format!("已提交知乎文章发布流程《{}", article.title))
}
@@ -380,7 +460,9 @@ fn execute_zhihu_article_entry_route<T: Transport + 'static>(
ZHIHU_DOMAIN,
)?;
if is_login_required_payload(&creator_state) {
return Ok(build_login_block_message(payload_current_url(&creator_state)));
return Ok(build_login_block_message(payload_current_url(
&creator_state,
)));
}
if payload_status(&creator_state) == Some("creator_home") {
return Ok(build_creator_entry_missing_message(payload_current_url(
@@ -401,13 +483,17 @@ fn execute_zhihu_article_entry_route<T: Transport + 'static>(
ZHIHU_EDITOR_DOMAIN,
)?;
if is_login_required_payload(&editor_state) {
return Ok(build_login_block_message(payload_current_url(&editor_state)));
return Ok(build_login_block_message(payload_current_url(
&editor_state,
)));
}
if payload_status(&editor_state) == Some("editor_ready") {
return Ok("已进入知乎文章编辑器。".to_string());
}
Ok(build_editor_unavailable_message(payload_current_url(&editor_state)))
Ok(build_editor_unavailable_message(payload_current_url(
&editor_state,
)))
}
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
@@ -443,7 +529,11 @@ fn parse_hotlist_items_payload(payload: &Value) -> Result<Vec<HotlistItem>, Pipe
let rank = cells[0]
.as_u64()
.or_else(|| cells[0].as_str().and_then(|value| value.parse::<u64>().ok()))
.or_else(|| {
cells[0]
.as_str()
.and_then(|value| value.parse::<u64>().ok())
})
.unwrap_or((items.len() + 1) as u64);
let title = cells[1].as_str().unwrap_or_default().trim().to_string();
let heat = cells[2].as_str().unwrap_or_default().trim().to_string();
@@ -483,7 +573,10 @@ fn navigate_zhihu_page<T: Transport + 'static>(
if response.success {
Ok(())
} else {
Err(PipeError::Protocol(format!("navigate failed: {}", response.data)))
Err(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)))
}
}
@@ -507,7 +600,9 @@ fn execute_browser_skill_script<T: Transport + 'static>(
)));
}
Ok(normalize_payload(response.data.get("text").unwrap_or(&response.data)))
Ok(normalize_payload(
response.data.get("text").unwrap_or(&response.data),
))
}
fn navigate_to_editor_after_creator_entry<T: Transport + 'static>(
@@ -542,6 +637,239 @@ fn navigate_to_editor_after_creator_entry<T: Transport + 'static>(
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::VecDeque;
use std::sync::{Arc, Mutex};
use crate::pipe::{BrowserMessage, Timing};
use crate::security::MacPolicy;
struct MockWorkflowTransport {
sent: Mutex<Vec<AgentMessage>>,
responses: Mutex<VecDeque<BrowserMessage>>,
}
impl MockWorkflowTransport {
fn new(responses: Vec<BrowserMessage>) -> Self {
Self {
sent: Mutex::new(Vec::new()),
responses: Mutex::new(VecDeque::from(responses)),
}
}
fn sent_messages(&self) -> Vec<AgentMessage> {
self.sent.lock().unwrap().clone()
}
}
impl Transport for MockWorkflowTransport {
fn send(&self, message: &AgentMessage) -> Result<(), PipeError> {
self.sent.lock().unwrap().push(message.clone());
Ok(())
}
fn recv_timeout(&self, _timeout: Duration) -> Result<BrowserMessage, PipeError> {
self.responses
.lock()
.unwrap()
.pop_front()
.ok_or(PipeError::Timeout)
}
}
fn zhihu_test_policy() -> MacPolicy {
MacPolicy::from_json_str(
&json!({
"version": "1.0",
"domains": { "allowed": ["www.zhihu.com"] },
"pipe_actions": {
"allowed": ["navigate", "getText", "eval"],
"blocked": []
}
})
.to_string(),
)
.unwrap()
}
fn success_browser_response(seq: u64, data: Value) -> BrowserMessage {
BrowserMessage::Response {
seq,
success: true,
data,
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
}
}
#[test]
fn collect_hotlist_items_skips_navigation_when_hot_page_is_already_readable() {
let transport = Arc::new(MockWorkflowTransport::new(vec![
success_browser_response(
1,
json!({ "text": "知乎热榜\n1 问题一 344万热度\n2 问题二 266万热度" }),
),
success_browser_response(
2,
json!({
"text": {
"source": "https://www.zhihu.com/hot",
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": [[1, "问题一", "344万"], [2, "问题二", "266万"]]
}
}),
),
]));
let browser_tool =
BrowserPipeTool::new(transport.clone(), zhihu_test_policy(), vec![1, 2, 3, 4])
.with_response_timeout(Duration::from_secs(1));
let task_context = CompatTaskContext {
page_url: Some("https://www.zhihu.com/hot".to_string()),
page_title: Some("知乎热榜".to_string()),
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
.expect("hotlist collection should succeed");
assert_eq!(items.len(), 2);
let sent = transport.sent_messages();
assert!(sent.iter().any(|message| {
matches!(
message,
AgentMessage::Command { action, .. } if action == &Action::GetText
)
}));
assert!(sent.iter().any(|message| {
matches!(
message,
AgentMessage::Command { action, .. } if action == &Action::Eval
)
}));
assert!(!sent.iter().any(|message| {
matches!(
message,
AgentMessage::Command { action, .. } if action == &Action::Navigate
)
}));
}
#[test]
fn collect_hotlist_items_polls_after_navigation_before_retrying_navigation() {
let transport = Arc::new(MockWorkflowTransport::new(vec![
success_browser_response(1, json!({ "navigated": true })),
success_browser_response(2, json!({ "text": "" })),
success_browser_response(3, json!({ "text": "" })),
success_browser_response(4, json!({ "text": "知乎热榜\n1 问题一 344万热度" })),
success_browser_response(
5,
json!({
"text": {
"source": "https://www.zhihu.com/hot",
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": [[1, "问题一", "344万"]]
}
}),
),
]));
let browser_tool =
BrowserPipeTool::new(transport.clone(), zhihu_test_policy(), vec![1, 2, 3, 4, 5])
.with_response_timeout(Duration::from_secs(1));
let task_context = CompatTaskContext {
page_url: Some("https://www.zhihu.com/".to_string()),
page_title: Some("知乎".to_string()),
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
.expect("hotlist collection should succeed after readiness polling");
assert_eq!(items.len(), 1);
let sent = transport.sent_messages();
let actions = sent
.iter()
.filter_map(|message| match message {
AgentMessage::Command { action, .. } => Some(action.clone()),
_ => None,
})
.collect::<Vec<_>>();
assert_eq!(
actions,
vec![
Action::Navigate,
Action::GetText,
Action::GetText,
Action::GetText,
Action::Eval
]
);
}
#[test]
fn collect_hotlist_items_retries_navigation_after_short_readiness_budget_expires() {
let transport = Arc::new(MockWorkflowTransport::new(vec![
success_browser_response(1, json!({ "navigated": true })),
success_browser_response(2, json!({ "text": "" })),
success_browser_response(3, json!({ "text": "" })),
success_browser_response(4, json!({ "text": "" })),
success_browser_response(5, json!({ "text": "" })),
success_browser_response(6, json!({ "text": "" })),
success_browser_response(7, json!({ "text": "" })),
success_browser_response(8, json!({ "text": "" })),
success_browser_response(9, json!({ "text": "" })),
success_browser_response(10, json!({ "text": "" })),
success_browser_response(11, json!({ "text": "" })),
success_browser_response(12, json!({ "navigated": true })),
success_browser_response(13, json!({ "text": "知乎热榜\n1 问题一 344万热度" })),
success_browser_response(
14,
json!({
"text": {
"source": "https://www.zhihu.com/hot",
"sheet_name": "知乎热榜",
"columns": ["rank", "title", "heat"],
"rows": [[1, "问题一", "344万"]]
}
}),
),
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
zhihu_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
)
.with_response_timeout(Duration::from_secs(1));
let task_context = CompatTaskContext {
page_url: Some("https://www.zhihu.com/".to_string()),
page_title: Some("知乎".to_string()),
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
.expect("hotlist collection should succeed after one navigation retry");
assert_eq!(items.len(), 1);
let sent = transport.sent_messages();
let navigate_count = sent
.iter()
.filter(|message| {
matches!(
message,
AgentMessage::Command { action, .. } if action == &Action::Navigate
)
})
.count();
assert_eq!(navigate_count, 2);
}
}
fn load_browser_skill_script(
skill_name: &str,
script_name: &str,
@@ -563,8 +891,7 @@ fn load_browser_skill_script(
})?;
Ok(format!(
"(function() {{\nconst args = {};\n{}\n}})()",
args,
script
args, script
))
}
@@ -632,11 +959,11 @@ fn build_publish_confirmation_message(article: &ArticleDraft) -> String {
fn has_explicit_publish_confirmation(instruction: &str) -> bool {
let trimmed = instruction.trim();
trimmed.contains("确认发布") ||
trimmed.contains("确认发表") ||
trimmed.contains("现在发布") ||
trimmed.contains("立即发布") ||
trimmed.contains("可以发布")
trimmed.contains("确认发布")
|| trimmed.contains("确认发表")
|| trimmed.contains("现在发布")
|| trimmed.contains("立即发布")
|| trimmed.contains("可以发布")
}
fn task_requests_zhihu_article_entry(
@@ -649,17 +976,17 @@ fn task_requests_zhihu_article_entry(
}
let normalized = instruction.to_ascii_lowercase();
let asks_to_open = normalized.contains("open") ||
normalized.contains("goto") ||
normalized.contains("go to") ||
instruction.contains("打开") ||
instruction.contains("进入") ||
instruction.contains("");
let mentions_entry = instruction.contains("页面") ||
instruction.contains("入口") ||
instruction.contains("创作中心") ||
instruction.contains("写文章") ||
instruction.contains("发文章");
let asks_to_open = normalized.contains("open")
|| normalized.contains("goto")
|| normalized.contains("go to")
|| instruction.contains("打开")
|| instruction.contains("进入")
|| instruction.contains("");
let mentions_entry = instruction.contains("页面")
|| instruction.contains("入口")
|| instruction.contains("创作中心")
|| instruction.contains("写文章")
|| instruction.contains("发文章");
let has_article_inputs = parse_article_draft(instruction).is_some();
asks_to_open && mentions_entry && !has_article_inputs
@@ -681,12 +1008,11 @@ fn extract_article_draft(
fn parse_article_draft(text: &str) -> Option<ArticleDraft> {
let normalized = normalize_article_draft_input(text);
let title_re = Regex::new(r"(?m)^标题[:]\s*(.+?)\s*$").expect("valid zhihu title regex");
let body_re =
Regex::new(r"(?s)正文[:]\s*(.+)$").expect("valid zhihu body regex");
let inline_title_re = Regex::new(r"标题(?:是|为)\s*([^,\n]+)")
.expect("valid inline zhihu title regex");
let inline_body_re = Regex::new(r"(?s)正文(?:是|为)\s*(.+)$")
.expect("valid inline zhihu body regex");
let body_re = Regex::new(r"(?s)正文[:]\s*(.+)$").expect("valid zhihu body regex");
let inline_title_re =
Regex::new(r"标题(?:是|为)\s*([^,\n]+)").expect("valid inline zhihu title regex");
let inline_body_re =
Regex::new(r"(?s)正文(?:是|为)\s*(.+)$").expect("valid inline zhihu body regex");
let title = title_re
.captures(&normalized)
@@ -718,9 +1044,9 @@ fn parse_article_draft(text: &str) -> Option<ArticleDraft> {
fn normalize_article_draft_input(text: &str) -> String {
let trimmed = text.trim();
let unquoted = if trimmed.len() >= 2 &&
((trimmed.starts_with('"') && trimmed.ends_with('"')) ||
(trimmed.starts_with('\'') && trimmed.ends_with('\'')))
let unquoted = if trimmed.len() >= 2
&& ((trimmed.starts_with('"') && trimmed.ends_with('"'))
|| (trimmed.starts_with('\'') && trimmed.ends_with('\'')))
{
&trimmed[1..trimmed.len() - 1]
} else {