feat: route staged scene skills through runtime
Add registry-driven scene routing and multi-root skill loading so fault-details and 95598 scene skills can be triggered from natural language while still running through the browser-backed runtime. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,25 +12,31 @@ use zeroclaw::tools::{Tool, ToolResult};
|
||||
use crate::browser::BrowserBackend;
|
||||
use crate::pipe::Action;
|
||||
|
||||
pub struct BrowserScriptInvocation<'a> {
|
||||
pub tool: &'a SkillTool,
|
||||
pub skill_root: &'a Path,
|
||||
}
|
||||
|
||||
pub struct BrowserScriptSkillTool {
|
||||
tool_name: String,
|
||||
tool_description: String,
|
||||
script_path: PathBuf,
|
||||
tool: SkillTool,
|
||||
skill_root: PathBuf,
|
||||
args: HashMap<String, String>,
|
||||
browser_tool: Arc<dyn BrowserBackend>,
|
||||
}
|
||||
|
||||
impl BrowserScriptSkillTool {
|
||||
pub fn new(
|
||||
skill_name: &str,
|
||||
tool: &SkillTool,
|
||||
skill_root: &Path,
|
||||
browser_tool: Arc<dyn BrowserBackend>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let script_path = skill_root.join(&tool.command);
|
||||
let canonical_skill_root = skill_root
|
||||
impl BrowserScriptInvocation<'_> {
|
||||
fn script_path(&self) -> PathBuf {
|
||||
self.skill_root.join(&self.tool.command)
|
||||
}
|
||||
|
||||
fn canonical_script_path(&self) -> anyhow::Result<PathBuf> {
|
||||
let script_path = self.script_path();
|
||||
let canonical_skill_root = self
|
||||
.skill_root
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| skill_root.to_path_buf());
|
||||
.unwrap_or_else(|_| self.skill_root.to_path_buf());
|
||||
let canonical_script_path = script_path.canonicalize().map_err(|err| {
|
||||
anyhow::anyhow!(
|
||||
"failed to resolve browser script {}: {err}",
|
||||
@@ -43,11 +49,25 @@ impl BrowserScriptSkillTool {
|
||||
canonical_script_path.display()
|
||||
);
|
||||
}
|
||||
Ok(canonical_script_path)
|
||||
}
|
||||
}
|
||||
|
||||
impl BrowserScriptSkillTool {
|
||||
pub fn new(
|
||||
skill_name: &str,
|
||||
tool: &SkillTool,
|
||||
skill_root: &Path,
|
||||
browser_tool: Arc<dyn BrowserBackend>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let invocation = BrowserScriptInvocation { tool, skill_root };
|
||||
invocation.canonical_script_path()?;
|
||||
|
||||
Ok(Self {
|
||||
tool_name: format!("{}.{}", skill_name, tool.name),
|
||||
tool_description: tool.description.clone(),
|
||||
script_path: canonical_script_path,
|
||||
tool: tool.clone(),
|
||||
skill_root: skill_root.to_path_buf(),
|
||||
args: tool.args.clone(),
|
||||
browser_tool,
|
||||
})
|
||||
@@ -99,81 +119,12 @@ impl Tool for BrowserScriptSkillTool {
|
||||
}
|
||||
|
||||
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
|
||||
let mut args = match args {
|
||||
Value::Object(args) => args,
|
||||
other => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected object arguments, got {other}"
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let raw_expected_domain = match args.remove("expected_domain") {
|
||||
Some(Value::String(value)) if !value.trim().is_empty() => value,
|
||||
Some(other) => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected_domain must be a non-empty string, got {other}"
|
||||
)))
|
||||
}
|
||||
None => {
|
||||
return Ok(failed_tool_result(
|
||||
"missing required field expected_domain".to_string(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let expected_domain = match normalize_domain_like(&raw_expected_domain) {
|
||||
Some(value) => value,
|
||||
None => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected_domain must resolve to a hostname, got {raw_expected_domain:?}"
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
for required_arg in self.args.keys() {
|
||||
if !args.contains_key(required_arg) {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"missing required field {required_arg}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let script_body = match fs::read_to_string(&self.script_path) {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"failed to read browser script {}: {err}",
|
||||
self.script_path.display()
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let wrapped_script = wrap_browser_script(&script_body, &Value::Object(args.clone()));
|
||||
let result = match self.browser_tool.invoke(
|
||||
Action::Eval,
|
||||
json!({ "script": wrapped_script }),
|
||||
&expected_domain,
|
||||
) {
|
||||
Ok(result) => result,
|
||||
Err(err) => return Ok(failed_tool_result(err.to_string())),
|
||||
};
|
||||
|
||||
if !result.success {
|
||||
return Ok(failed_tool_result(format_browser_script_error(
|
||||
&result.data,
|
||||
)));
|
||||
}
|
||||
|
||||
let payload = result
|
||||
.data
|
||||
.get("text")
|
||||
.cloned()
|
||||
.unwrap_or_else(|| result.data.clone());
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: stringify_tool_payload(&payload)?,
|
||||
error: None,
|
||||
})
|
||||
execute_browser_script_impl(
|
||||
&self.tool,
|
||||
&self.skill_root,
|
||||
self.browser_tool.clone(),
|
||||
args,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,6 +162,99 @@ pub fn build_browser_script_skill_tools(
|
||||
Ok(tools)
|
||||
}
|
||||
|
||||
pub async fn execute_browser_script_tool(
|
||||
tool: &SkillTool,
|
||||
skill_root: &Path,
|
||||
browser_tool: Arc<dyn BrowserBackend>,
|
||||
args: Value,
|
||||
) -> anyhow::Result<ToolResult> {
|
||||
execute_browser_script_impl(tool, skill_root, browser_tool, args)
|
||||
}
|
||||
|
||||
fn execute_browser_script_impl(
|
||||
tool: &SkillTool,
|
||||
skill_root: &Path,
|
||||
browser_tool: Arc<dyn BrowserBackend>,
|
||||
args: Value,
|
||||
) -> anyhow::Result<ToolResult> {
|
||||
let invocation = BrowserScriptInvocation { tool, skill_root };
|
||||
let script_path = invocation.canonical_script_path()?;
|
||||
|
||||
let mut args = match args {
|
||||
Value::Object(args) => args,
|
||||
other => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected object arguments, got {other}"
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let raw_expected_domain = match args.remove("expected_domain") {
|
||||
Some(Value::String(value)) if !value.trim().is_empty() => value,
|
||||
Some(other) => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected_domain must be a non-empty string, got {other}"
|
||||
)))
|
||||
}
|
||||
None => {
|
||||
return Ok(failed_tool_result(
|
||||
"missing required field expected_domain".to_string(),
|
||||
))
|
||||
}
|
||||
};
|
||||
let expected_domain = match normalize_domain_like(&raw_expected_domain) {
|
||||
Some(value) => value,
|
||||
None => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected_domain must resolve to a hostname, got {raw_expected_domain:?}"
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
for required_arg in tool.args.keys() {
|
||||
if !args.contains_key(required_arg) {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"missing required field {required_arg}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let script_body = match fs::read_to_string(&script_path) {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"failed to read browser script {}: {err}",
|
||||
script_path.display()
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let wrapped_script = wrap_browser_script(&script_body, &Value::Object(args.clone()));
|
||||
let result = match browser_tool.invoke(
|
||||
Action::Eval,
|
||||
json!({ "script": wrapped_script }),
|
||||
&expected_domain,
|
||||
) {
|
||||
Ok(result) => result,
|
||||
Err(err) => return Ok(failed_tool_result(err.to_string())),
|
||||
};
|
||||
|
||||
if !result.success {
|
||||
return Ok(failed_tool_result(format_browser_script_error(&result.data)));
|
||||
}
|
||||
|
||||
let payload = result
|
||||
.data
|
||||
.get("text")
|
||||
.cloned()
|
||||
.unwrap_or_else(|| result.data.clone());
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: stringify_tool_payload(&payload)?,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn wrap_browser_script(script_body: &str, args: &Value) -> String {
|
||||
format!(
|
||||
"(function() {{\nconst args = {};\n{}\n}})()",
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::runtime::RuntimeProfile;
|
||||
|
||||
const SGCLAW_ZEROCLAW_WORKSPACE_DIR: &str = ".sgclaw-zeroclaw-workspace";
|
||||
const SKILLS_DIR_NAME: &str = "skills";
|
||||
const STAGED_SKILLS_DIR_NAME: &str = "skill_staging";
|
||||
|
||||
pub fn build_zeroclaw_config(
|
||||
workspace_root: &Path,
|
||||
@@ -87,15 +88,41 @@ pub fn zeroclaw_default_skills_dir(workspace_root: &Path) -> PathBuf {
|
||||
zeroclaw_workspace_dir(workspace_root).join(SKILLS_DIR_NAME)
|
||||
}
|
||||
|
||||
pub fn resolve_skills_dir(workspace_root: &Path, settings: &DeepSeekSettings) -> PathBuf {
|
||||
resolve_skills_dir_path(workspace_root, settings.skills_dir.as_deref())
|
||||
pub fn resolve_skills_dir(workspace_root: &Path, settings: &DeepSeekSettings) -> Vec<PathBuf> {
|
||||
resolve_skills_dir_paths(workspace_root, &settings.skills_dir)
|
||||
}
|
||||
|
||||
pub fn resolve_skills_dir_from_sgclaw_settings(
|
||||
workspace_root: &Path,
|
||||
settings: &SgClawSettings,
|
||||
) -> PathBuf {
|
||||
resolve_skills_dir_path(workspace_root, settings.skills_dir.as_deref())
|
||||
) -> Vec<PathBuf> {
|
||||
resolve_skills_dir_paths(workspace_root, &settings.skills_dir)
|
||||
}
|
||||
|
||||
pub fn resolve_scene_skills_dir_from_sgclaw_settings(
|
||||
workspace_root: &Path,
|
||||
settings: &SgClawSettings,
|
||||
) -> Vec<PathBuf> {
|
||||
resolve_skills_dir_from_sgclaw_settings(workspace_root, settings)
|
||||
.into_iter()
|
||||
.flat_map(|dir| {
|
||||
let scene_dir = resolve_scene_skills_dir_path(dir.clone());
|
||||
if scene_dir != dir {
|
||||
vec![dir, scene_dir]
|
||||
} else {
|
||||
vec![dir]
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn resolve_scene_skills_dir_path(skills_dir: PathBuf) -> PathBuf {
|
||||
let staged_skills_dir = skills_dir.join(STAGED_SKILLS_DIR_NAME).join(SKILLS_DIR_NAME);
|
||||
if staged_skills_dir.is_dir() {
|
||||
staged_skills_dir
|
||||
} else {
|
||||
skills_dir
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_configured_skills_dir(configured_dir: &Path) -> PathBuf {
|
||||
@@ -111,8 +138,13 @@ fn normalize_configured_skills_dir(configured_dir: &Path) -> PathBuf {
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_skills_dir_path(workspace_root: &Path, configured_dir: Option<&Path>) -> PathBuf {
|
||||
configured_dir
|
||||
.map(normalize_configured_skills_dir)
|
||||
.unwrap_or_else(|| zeroclaw_default_skills_dir(workspace_root))
|
||||
fn resolve_skills_dir_paths(workspace_root: &Path, configured_dirs: &[PathBuf]) -> Vec<PathBuf> {
|
||||
if configured_dirs.is_empty() {
|
||||
vec![zeroclaw_default_skills_dir(workspace_root)]
|
||||
} else {
|
||||
configured_dirs
|
||||
.iter()
|
||||
.map(|d| normalize_configured_skills_dir(d))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,7 +146,7 @@ pub async fn execute_task_with_provider(
|
||||
instruction: &str,
|
||||
task_context: &CompatTaskContext,
|
||||
config: ZeroClawConfig,
|
||||
skills_dir: PathBuf,
|
||||
skills_dir: Vec<PathBuf>,
|
||||
settings: SgClawSettings,
|
||||
) -> Result<String, PipeError> {
|
||||
let engine = RuntimeEngine::new(settings.runtime_profile);
|
||||
|
||||
@@ -5,11 +5,15 @@ use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use regex::Regex;
|
||||
use reqwest::Url;
|
||||
use serde_json::{json, Value};
|
||||
use zeroclaw::skills::load_skills_from_directory;
|
||||
use zeroclaw::tools::Tool;
|
||||
|
||||
use crate::browser::{BrowserBackend, PipeBrowserBackend};
|
||||
use crate::compat::artifact_open::{open_exported_xlsx, open_local_dashboard, PostExportOpen};
|
||||
use crate::compat::browser_script_skill_tool::execute_browser_script_tool;
|
||||
use crate::compat::config_adapter::resolve_scene_skills_dir_from_sgclaw_settings;
|
||||
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
|
||||
use crate::compat::runtime::CompatTaskContext;
|
||||
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
|
||||
@@ -23,17 +27,19 @@ const ZHIHU_EDITOR_DOMAIN: &str = "zhuanlan.zhihu.com";
|
||||
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
|
||||
const ZHIHU_CREATOR_URL: &str = "https://www.zhihu.com/creator";
|
||||
const ZHIHU_EDITOR_URL: &str = "https://zhuanlan.zhihu.com/write";
|
||||
const FAULT_DETAILS_SCENE_ID: &str = "fault-details-report";
|
||||
const HOTLIST_READY_POLL_ATTEMPTS: usize = 10;
|
||||
const HOTLIST_READY_POLL_INTERVAL: Duration = Duration::from_millis(500);
|
||||
// Simplified readiness pattern: only checks that *some* heat metric exists
|
||||
// (e.g. "3440万热度", "2.1亿"). The full rank-title-heat structure is validated
|
||||
// later by the extraction script. Using a simple pattern avoids problems with
|
||||
// the multi-line innerText format where rank, title, and heat are on separate
|
||||
// lines (`.` does not cross newlines by default).
|
||||
const EDITOR_READY_POLL_ATTEMPTS: usize = 12;
|
||||
const EDITOR_READY_POLL_INTERVAL: Duration = Duration::from_millis(500);
|
||||
// Readiness pattern: requires the "热度" suffix so that sidebar "大家都在搜"
|
||||
// entries (which show bare "414万" without "热度") do NOT trigger a premature
|
||||
// readiness signal. The main hotlist always renders "538万热度".
|
||||
const HOTLIST_TEXT_READY_PATTERN: &str =
|
||||
r"\d+(?:\.\d+)?\s*(?:万|亿|k|K|m|M)\s*(?:热度)?";
|
||||
r"\d+(?:\.\d+)?\s*(?:万|亿|k|K|m|M)\s*热度";
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum WorkflowRoute {
|
||||
FaultDetailsReport,
|
||||
ZhihuHotlistExportXlsx,
|
||||
ZhihuHotlistScreen,
|
||||
ZhihuArticleEntry,
|
||||
@@ -60,6 +66,13 @@ pub fn detect_route(
|
||||
page_url: Option<&str>,
|
||||
page_title: Option<&str>,
|
||||
) -> Option<WorkflowRoute> {
|
||||
if let Some(scene) = crate::runtime::match_scene_instruction(instruction) {
|
||||
if scene.id == FAULT_DETAILS_SCENE_ID
|
||||
&& matches!(scene.dispatch_mode, crate::runtime::DispatchMode::DirectBrowser)
|
||||
{
|
||||
return Some(WorkflowRoute::FaultDetailsReport);
|
||||
}
|
||||
}
|
||||
if crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
|
||||
let normalized = instruction.to_ascii_lowercase();
|
||||
if normalized.contains("dashboard")
|
||||
@@ -93,7 +106,8 @@ pub fn detect_route(
|
||||
pub fn prefers_direct_execution(route: &WorkflowRoute) -> bool {
|
||||
matches!(
|
||||
route,
|
||||
WorkflowRoute::ZhihuHotlistExportXlsx
|
||||
WorkflowRoute::FaultDetailsReport
|
||||
| WorkflowRoute::ZhihuHotlistExportXlsx
|
||||
| WorkflowRoute::ZhihuHotlistScreen
|
||||
| WorkflowRoute::ZhihuArticleEntry
|
||||
| WorkflowRoute::ZhihuArticleDraft
|
||||
@@ -119,7 +133,8 @@ pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bo
|
||||
looks_like_denial
|
||||
|| matches!(
|
||||
route,
|
||||
WorkflowRoute::ZhihuHotlistExportXlsx
|
||||
WorkflowRoute::FaultDetailsReport
|
||||
| WorkflowRoute::ZhihuHotlistExportXlsx
|
||||
| WorkflowRoute::ZhihuHotlistScreen
|
||||
| WorkflowRoute::ZhihuArticleEntry
|
||||
| WorkflowRoute::ZhihuArticleDraft
|
||||
@@ -138,6 +153,13 @@ pub fn execute_route_with_browser_backend(
|
||||
settings: &SgClawSettings,
|
||||
) -> Result<String, PipeError> {
|
||||
match route {
|
||||
WorkflowRoute::FaultDetailsReport => execute_fault_details_route(
|
||||
browser_backend.clone(),
|
||||
instruction,
|
||||
workspace_root,
|
||||
settings,
|
||||
task_context.page_url.as_deref(),
|
||||
),
|
||||
WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen => {
|
||||
let top_n = extract_top_n(instruction);
|
||||
let items = collect_hotlist_items(transport, browser_backend.as_ref(), top_n, task_context)?;
|
||||
@@ -210,6 +232,157 @@ pub fn execute_route<T: Transport + 'static>(
|
||||
)
|
||||
}
|
||||
|
||||
fn execute_fault_details_route(
|
||||
browser_backend: Arc<dyn BrowserBackend>,
|
||||
instruction: &str,
|
||||
workspace_root: &Path,
|
||||
settings: &SgClawSettings,
|
||||
page_url: Option<&str>,
|
||||
) -> Result<String, PipeError> {
|
||||
let scene = crate::runtime::match_scene_instruction(instruction).ok_or_else(|| {
|
||||
PipeError::Protocol("故障明细直连路由失败:未找到场景元数据。".to_string())
|
||||
})?;
|
||||
if scene.id != FAULT_DETAILS_SCENE_ID {
|
||||
return Err(PipeError::Protocol(format!(
|
||||
"故障明细直连路由失败:场景不匹配,got {}",
|
||||
scene.id
|
||||
)));
|
||||
}
|
||||
|
||||
let period = derive_fault_details_period(instruction).ok_or_else(|| {
|
||||
PipeError::Protocol(
|
||||
"故障明细直连路由失败:无法从当前指令安全推导必填参数 period,请明确提供例如“导出 2026-04 故障明细”。"
|
||||
.to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let skills_dirs = resolve_scene_skills_dir_from_sgclaw_settings(workspace_root, settings);
|
||||
let skill = skills_dirs
|
||||
.iter()
|
||||
.flat_map(|dir| load_skills_from_directory(dir, true))
|
||||
.find(|skill| skill.name == scene.skill_package)
|
||||
.ok_or_else(|| {
|
||||
PipeError::Protocol(format!(
|
||||
"故障明细直连路由失败:未找到技能包 {} in [{}]",
|
||||
scene.skill_package,
|
||||
skills_dirs.iter().map(|d| d.display().to_string()).collect::<Vec<_>>().join(", ")
|
||||
))
|
||||
})?;
|
||||
let skill_root = skill
|
||||
.location
|
||||
.as_deref()
|
||||
.and_then(Path::parent)
|
||||
.ok_or_else(|| {
|
||||
PipeError::Protocol(format!(
|
||||
"故障明细直连路由失败:技能包 {} 缺少有效位置元数据",
|
||||
scene.skill_package
|
||||
))
|
||||
})?;
|
||||
let tool = skill
|
||||
.tools
|
||||
.iter()
|
||||
.find(|tool| tool.name == scene.skill_tool)
|
||||
.ok_or_else(|| {
|
||||
PipeError::Protocol(format!(
|
||||
"故障明细直连路由失败:技能包 {} 缺少工具 {}",
|
||||
scene.skill_package, scene.skill_tool
|
||||
))
|
||||
})?;
|
||||
if tool.kind != "browser_script" {
|
||||
return Err(PipeError::Protocol(format!(
|
||||
"故障明细直连路由失败:工具 {} 必须是 browser_script,当前为 {}",
|
||||
scene.skill_tool, tool.kind
|
||||
)));
|
||||
}
|
||||
|
||||
let expected_domain = fault_details_expected_domain(page_url, &scene.expected_domain)
|
||||
.ok_or_else(|| {
|
||||
PipeError::Protocol(
|
||||
"故障明细直连路由失败:无法从当前页面上下文解析可用域名。".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let runtime = tokio::runtime::Runtime::new()
|
||||
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
|
||||
let result = runtime
|
||||
.block_on(execute_browser_script_tool(
|
||||
tool,
|
||||
skill_root,
|
||||
browser_backend,
|
||||
json!({
|
||||
"expected_domain": expected_domain,
|
||||
"period": period,
|
||||
}),
|
||||
))
|
||||
.map_err(|err| PipeError::Protocol(err.to_string()))?;
|
||||
if !result.success {
|
||||
return Err(PipeError::Protocol(
|
||||
result
|
||||
.error
|
||||
.unwrap_or_else(|| "fault-details-report browser script failed".to_string()),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(result.output)
|
||||
}
|
||||
|
||||
fn fault_details_expected_domain(page_url: Option<&str>, fallback: &str) -> Option<String> {
|
||||
page_url
|
||||
.and_then(host_from_url)
|
||||
.or_else(|| host_from_url(fallback))
|
||||
}
|
||||
|
||||
fn host_from_url(raw: &str) -> Option<String> {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Ok(url) = Url::parse(trimmed) {
|
||||
return url.host_str().map(|host| host.to_ascii_lowercase());
|
||||
}
|
||||
|
||||
let host = trimmed
|
||||
.trim_start_matches("https://")
|
||||
.trim_start_matches("http://")
|
||||
.split(['/', '?', '#'])
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.split(':')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
(!host.is_empty()).then_some(host)
|
||||
}
|
||||
|
||||
fn derive_fault_details_period(instruction: &str) -> Option<String> {
|
||||
let month_re = Regex::new(r"(20\d{2})[-/年](0?[1-9]|1[0-2])").expect("valid fault details month regex");
|
||||
let derived = month_re.captures_iter(instruction).find_map(|capture| {
|
||||
let matched = capture.get(0)?;
|
||||
let before_is_digit = instruction[..matched.start()]
|
||||
.chars()
|
||||
.next_back()
|
||||
.is_some_and(|ch| ch.is_ascii_digit());
|
||||
let after_is_digit = instruction[matched.end()..]
|
||||
.chars()
|
||||
.next()
|
||||
.is_some_and(|ch| ch.is_ascii_digit());
|
||||
if before_is_digit || after_is_digit {
|
||||
return None;
|
||||
}
|
||||
|
||||
let year = capture.get(1).map(|m| m.as_str()).unwrap_or_default();
|
||||
let month = capture
|
||||
.get(2)
|
||||
.and_then(|m| m.as_str().parse::<u32>().ok())
|
||||
.unwrap_or(1);
|
||||
Some(format!("{year}-{month:02}"))
|
||||
});
|
||||
derived
|
||||
}
|
||||
|
||||
fn collect_hotlist_items(
|
||||
transport: &dyn crate::agent::AgentEventSink,
|
||||
browser_tool: &dyn BrowserBackend,
|
||||
@@ -258,10 +431,16 @@ fn ensure_hotlist_page_ready(
|
||||
.as_deref()
|
||||
.is_some_and(|title| title.contains("热榜"));
|
||||
|
||||
if starts_on_hotlist && poll_for_hotlist_readiness(browser_tool)? {
|
||||
return Ok(None);
|
||||
}
|
||||
// Always validate via probe_hotlist_extractor rather than returning
|
||||
// Ok(None) on a bare readiness pass. The readiness poll uses getText(body)
|
||||
// which can be triggered by sidebar / nav-bar content before the main
|
||||
// hotlist DOM has rendered. probe_hotlist_extractor runs the full
|
||||
// extraction script and returns None when no valid rows are found,
|
||||
// allowing the retry loop to kick in.
|
||||
if starts_on_hotlist {
|
||||
// Best-effort wait for content to appear; ignore the boolean result –
|
||||
// we always follow up with the probe.
|
||||
let _ = poll_for_hotlist_readiness(browser_tool);
|
||||
if let Some(items) = probe_hotlist_extractor(transport, browser_tool, top_n)? {
|
||||
return Ok(Some(items));
|
||||
}
|
||||
@@ -270,19 +449,77 @@ fn ensure_hotlist_page_ready(
|
||||
let mut last_error = None;
|
||||
for attempt in 0..2 {
|
||||
navigate_hotlist_page(transport, browser_tool)?;
|
||||
if poll_for_hotlist_readiness(browser_tool)? {
|
||||
return Ok(None);
|
||||
}
|
||||
let _ = poll_for_hotlist_readiness(browser_tool);
|
||||
if let Some(items) = probe_hotlist_extractor(transport, browser_tool, top_n)? {
|
||||
return Ok(Some(items));
|
||||
}
|
||||
last_error = Some(PipeError::Protocol(format!(
|
||||
last_error = Some(format!(
|
||||
"知乎热榜页面已打开,但在短轮询窗口内仍未出现可读热榜内容(attempt={})",
|
||||
attempt + 1
|
||||
)));
|
||||
));
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or_else(|| PipeError::Protocol("知乎热榜页面未就绪".to_string())))
|
||||
// Log the last failure for diagnostics, then let caller try one final
|
||||
// extraction as a last resort.
|
||||
if let Some(msg) = last_error {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "warn".to_string(),
|
||||
message: msg,
|
||||
}).ok();
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Poll the Zhihu write page until `prepare_article_editor.js` reports
|
||||
/// "editor_ready" or a terminal state (login_required). The editor page
|
||||
/// is a React SPA whose title textarea and Draft.js body take noticeable
|
||||
/// time to mount after navigation, so a single immediate check frequently
|
||||
/// reports "editor_unavailable".
|
||||
fn poll_for_editor_readiness(
|
||||
browser_tool: &dyn BrowserBackend,
|
||||
desired_mode: &str,
|
||||
) -> Result<Value, PipeError> {
|
||||
let args = json!({ "desired_mode": desired_mode });
|
||||
let mut last_state: Option<Value> = None;
|
||||
|
||||
for attempt in 0..EDITOR_READY_POLL_ATTEMPTS {
|
||||
match execute_browser_skill_script(
|
||||
browser_tool,
|
||||
"zhihu-write",
|
||||
"prepare_article_editor.js",
|
||||
args.clone(),
|
||||
ZHIHU_EDITOR_DOMAIN,
|
||||
) {
|
||||
Ok(state) => {
|
||||
let status = payload_status(&state);
|
||||
if status == Some("editor_ready") || status == Some("login_required") {
|
||||
return Ok(state);
|
||||
}
|
||||
last_state = Some(state);
|
||||
}
|
||||
Err(PipeError::PipeClosed) => return Err(PipeError::PipeClosed),
|
||||
Err(_) => {
|
||||
// Script may fail while the page is still navigating; tolerate.
|
||||
}
|
||||
}
|
||||
|
||||
if attempt + 1 < EDITOR_READY_POLL_ATTEMPTS {
|
||||
thread::sleep(EDITOR_READY_POLL_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the last observed state so the caller can surface the
|
||||
// "editor_unavailable" message; or make one final attempt.
|
||||
match last_state {
|
||||
Some(state) => Ok(state),
|
||||
None => execute_browser_skill_script(
|
||||
browser_tool,
|
||||
"zhihu-write",
|
||||
"prepare_article_editor.js",
|
||||
args,
|
||||
ZHIHU_EDITOR_DOMAIN,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn probe_hotlist_extractor(
|
||||
@@ -516,12 +753,9 @@ fn execute_zhihu_article_route(
|
||||
level: "info".to_string(),
|
||||
message: "call zhihu-write.prepare_article_editor".to_string(),
|
||||
})?;
|
||||
let editor_state = execute_browser_skill_script(
|
||||
let editor_state = poll_for_editor_readiness(
|
||||
browser_tool,
|
||||
"zhihu-write",
|
||||
"prepare_article_editor.js",
|
||||
json!({ "desired_mode": if publish_mode { "publish" } else { "draft" } }),
|
||||
ZHIHU_EDITOR_DOMAIN,
|
||||
if publish_mode { "publish" } else { "draft" },
|
||||
)?;
|
||||
if is_login_required_payload(&editor_state) {
|
||||
return Ok(build_login_block_message(payload_current_url(
|
||||
@@ -669,12 +903,9 @@ fn execute_zhihu_article_entry_route(
|
||||
level: "info".to_string(),
|
||||
message: "call zhihu-write.prepare_article_editor".to_string(),
|
||||
})?;
|
||||
let editor_state = execute_browser_skill_script(
|
||||
let editor_state = poll_for_editor_readiness(
|
||||
browser_tool,
|
||||
"zhihu-write",
|
||||
"prepare_article_editor.js",
|
||||
json!({ "desired_mode": "draft" }),
|
||||
ZHIHU_EDITOR_DOMAIN,
|
||||
"draft",
|
||||
)?;
|
||||
if is_login_required_payload(&editor_state) {
|
||||
return Ok(build_login_block_message(payload_current_url(
|
||||
@@ -1044,7 +1275,7 @@ mod tests {
|
||||
"test-key".to_string(),
|
||||
"http://127.0.0.1:9".to_string(),
|
||||
"deepseek-chat".to_string(),
|
||||
None,
|
||||
Vec::new(),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user