feat: add browser script skill execution
This commit is contained in:
254
src/compat/browser_script_skill_tool.rs
Normal file
254
src/compat/browser_script_skill_tool.rs
Normal file
@@ -0,0 +1,254 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use reqwest::Url;
|
||||
use serde_json::{json, Value};
|
||||
use zeroclaw::skills::{Skill, SkillTool};
|
||||
use zeroclaw::tools::{Tool, ToolResult};
|
||||
|
||||
use crate::pipe::{Action, BrowserPipeTool, Transport};
|
||||
|
||||
pub struct BrowserScriptSkillTool<T: Transport> {
|
||||
tool_name: String,
|
||||
tool_description: String,
|
||||
script_path: PathBuf,
|
||||
args: HashMap<String, String>,
|
||||
browser_tool: BrowserPipeTool<T>,
|
||||
}
|
||||
|
||||
impl<T: Transport> BrowserScriptSkillTool<T> {
|
||||
pub fn new(
|
||||
skill_name: &str,
|
||||
tool: &SkillTool,
|
||||
skill_root: &Path,
|
||||
browser_tool: BrowserPipeTool<T>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let script_path = skill_root.join(&tool.command);
|
||||
let canonical_skill_root = skill_root.canonicalize().unwrap_or_else(|_| skill_root.to_path_buf());
|
||||
let canonical_script_path = script_path
|
||||
.canonicalize()
|
||||
.map_err(|err| anyhow::anyhow!("failed to resolve browser script {}: {err}", script_path.display()))?;
|
||||
if !canonical_script_path.starts_with(&canonical_skill_root) {
|
||||
anyhow::bail!(
|
||||
"browser script path escapes skill root: {}",
|
||||
canonical_script_path.display()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
tool_name: format!("{}.{}", skill_name, tool.name),
|
||||
tool_description: tool.description.clone(),
|
||||
script_path: canonical_script_path,
|
||||
args: tool.args.clone(),
|
||||
browser_tool,
|
||||
})
|
||||
}
|
||||
|
||||
fn build_parameters_schema(&self) -> Value {
|
||||
let mut properties = serde_json::Map::new();
|
||||
let mut required = vec![Value::String("expected_domain".to_string())];
|
||||
|
||||
properties.insert(
|
||||
"expected_domain".to_string(),
|
||||
json!({
|
||||
"type": "string",
|
||||
"description": "Bare hostname for the current page, for example www.zhihu.com."
|
||||
}),
|
||||
);
|
||||
|
||||
for (name, description) in &self.args {
|
||||
properties.insert(
|
||||
name.clone(),
|
||||
json!({
|
||||
"type": "string",
|
||||
"description": description
|
||||
}),
|
||||
);
|
||||
required.push(Value::String(name.clone()));
|
||||
}
|
||||
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": properties,
|
||||
"required": required
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<T: Transport + 'static> Tool for BrowserScriptSkillTool<T> {
|
||||
fn name(&self) -> &str {
|
||||
&self.tool_name
|
||||
}
|
||||
|
||||
fn description(&self) -> &str {
|
||||
&self.tool_description
|
||||
}
|
||||
|
||||
fn parameters_schema(&self) -> Value {
|
||||
self.build_parameters_schema()
|
||||
}
|
||||
|
||||
async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
|
||||
let mut args = match args {
|
||||
Value::Object(args) => args,
|
||||
other => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected object arguments, got {other}"
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let raw_expected_domain = match args.remove("expected_domain") {
|
||||
Some(Value::String(value)) if !value.trim().is_empty() => value,
|
||||
Some(other) => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected_domain must be a non-empty string, got {other}"
|
||||
)))
|
||||
}
|
||||
None => return Ok(failed_tool_result("missing required field expected_domain".to_string())),
|
||||
};
|
||||
let expected_domain = match normalize_domain_like(&raw_expected_domain) {
|
||||
Some(value) => value,
|
||||
None => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"expected_domain must resolve to a hostname, got {raw_expected_domain:?}"
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
for required_arg in self.args.keys() {
|
||||
if !args.contains_key(required_arg) {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"missing required field {required_arg}"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let script_body = match fs::read_to_string(&self.script_path) {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
return Ok(failed_tool_result(format!(
|
||||
"failed to read browser script {}: {err}",
|
||||
self.script_path.display()
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
let wrapped_script = wrap_browser_script(&script_body, &Value::Object(args.clone()));
|
||||
let result = match self.browser_tool.invoke(
|
||||
Action::Eval,
|
||||
json!({ "script": wrapped_script }),
|
||||
&expected_domain,
|
||||
) {
|
||||
Ok(result) => result,
|
||||
Err(err) => return Ok(failed_tool_result(err.to_string())),
|
||||
};
|
||||
|
||||
if !result.success {
|
||||
return Ok(failed_tool_result(format_browser_script_error(&result.data)));
|
||||
}
|
||||
|
||||
let payload = result
|
||||
.data
|
||||
.get("text")
|
||||
.cloned()
|
||||
.unwrap_or_else(|| result.data.clone());
|
||||
Ok(ToolResult {
|
||||
success: true,
|
||||
output: stringify_tool_payload(&payload)?,
|
||||
error: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_browser_script_skill_tools<T: Transport + 'static>(
|
||||
skills: &[Skill],
|
||||
browser_tool: BrowserPipeTool<T>,
|
||||
) -> Result<Vec<Box<dyn Tool>>, anyhow::Error> {
|
||||
let mut tools: Vec<Box<dyn Tool>> = Vec::new();
|
||||
|
||||
for skill in skills {
|
||||
let Some(location) = skill.location.as_ref() else {
|
||||
continue;
|
||||
};
|
||||
let Some(skill_root) = location.parent() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for tool in &skill.tools {
|
||||
if tool.kind != "browser_script" {
|
||||
continue;
|
||||
}
|
||||
tools.push(Box::new(BrowserScriptSkillTool::new(
|
||||
&skill.name,
|
||||
tool,
|
||||
skill_root,
|
||||
browser_tool.clone(),
|
||||
)?));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tools)
|
||||
}
|
||||
|
||||
fn wrap_browser_script(script_body: &str, args: &Value) -> String {
|
||||
format!(
|
||||
"(function() {{\nconst args = {};\n{}\n}})()",
|
||||
serde_json::to_string(args).unwrap_or_else(|_| "{}".to_string()),
|
||||
script_body
|
||||
)
|
||||
}
|
||||
|
||||
fn stringify_tool_payload(payload: &Value) -> anyhow::Result<String> {
|
||||
Ok(match payload {
|
||||
Value::String(value) => value.clone(),
|
||||
Value::Null => "null".to_string(),
|
||||
Value::Bool(_) | Value::Number(_) | Value::Array(_) | Value::Object(_) => {
|
||||
serde_json::to_string(payload)?
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn failed_tool_result(error: String) -> ToolResult {
|
||||
ToolResult {
|
||||
success: false,
|
||||
output: String::new(),
|
||||
error: Some(error),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_browser_script_error(data: &Value) -> String {
|
||||
data.get("error")
|
||||
.and_then(|value| value.get("message"))
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
.unwrap_or_else(|| format!("browser script failed: {data}"))
|
||||
}
|
||||
|
||||
fn normalize_domain_like(raw: &str) -> Option<String> {
|
||||
let trimmed = raw.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Ok(url) = Url::parse(trimmed) {
|
||||
return url.host_str().map(|host| host.to_ascii_lowercase());
|
||||
}
|
||||
|
||||
let host = trimmed
|
||||
.trim_start_matches("https://")
|
||||
.trim_start_matches("http://")
|
||||
.split(['/', '?', '#'])
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.split(':')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
|
||||
(!host.is_empty()).then_some(host)
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod browser_script_skill_tool;
|
||||
pub mod browser_tool_adapter;
|
||||
pub mod config_adapter;
|
||||
pub mod cron_adapter;
|
||||
|
||||
@@ -12,6 +12,7 @@ use zeroclaw::providers::traits::{
|
||||
ProviderCapabilities, StreamEvent, StreamOptions, StreamResult,
|
||||
};
|
||||
|
||||
use crate::compat::browser_script_skill_tool::build_browser_script_skill_tools;
|
||||
use crate::compat::browser_tool_adapter::ZeroClawBrowserTool;
|
||||
use crate::compat::config_adapter::{
|
||||
build_zeroclaw_config_from_sgclaw_settings,
|
||||
@@ -120,6 +121,7 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
|
||||
message: format!("loaded skills: {}", loaded_skill_labels.join(", ")),
|
||||
})?;
|
||||
}
|
||||
let browser_tool_for_scripts = browser_tool.clone();
|
||||
let mut tools: Vec<Box<dyn zeroclaw::tools::Tool>> = if browser_surface_present {
|
||||
vec![
|
||||
Box::new(ZeroClawBrowserTool::new_superrpa(browser_tool.clone())),
|
||||
@@ -128,6 +130,12 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
if browser_surface_present {
|
||||
tools.extend(
|
||||
build_browser_script_skill_tools(&loaded_skills, browser_tool_for_scripts)
|
||||
.map_err(map_anyhow_to_pipe_error)?,
|
||||
);
|
||||
}
|
||||
if matches!(settings.office_backend, OfficeBackend::OpenXml) &&
|
||||
engine.should_attach_openxml_office_tool(instruction)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use regex::Regex;
|
||||
@@ -11,8 +11,6 @@ use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
|
||||
|
||||
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
|
||||
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
|
||||
const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"];
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum WorkflowRoute {
|
||||
ZhihuHotlistExportXlsx,
|
||||
@@ -87,28 +85,28 @@ fn collect_hotlist_items<T: Transport + 'static>(
|
||||
top_n: usize,
|
||||
) -> Result<Vec<HotlistItem>, PipeError> {
|
||||
navigate_hotlist_with_retry(transport, browser_tool)?;
|
||||
|
||||
for selector in HOTLIST_ROOT_SELECTORS {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: format!("getText {selector}"),
|
||||
})?;
|
||||
let response = browser_tool.invoke(
|
||||
Action::GetText,
|
||||
json!({ "selector": selector }),
|
||||
ZHIHU_DOMAIN,
|
||||
)?;
|
||||
if !response.success {
|
||||
continue;
|
||||
}
|
||||
let text = response.data["text"].as_str().unwrap_or_default();
|
||||
let items = parse_hotlist_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return Ok(items);
|
||||
}
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: "call zhihu-hotlist.extract_hotlist".to_string(),
|
||||
})?;
|
||||
let response = browser_tool.invoke(
|
||||
Action::Eval,
|
||||
json!({ "script": load_hotlist_extractor_script(top_n)? }),
|
||||
ZHIHU_DOMAIN,
|
||||
)?;
|
||||
if !response.success {
|
||||
return Err(PipeError::Protocol(format!(
|
||||
"知乎热榜采集失败:{}",
|
||||
response
|
||||
.data
|
||||
.get("error")
|
||||
.and_then(|value| value.get("message"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("browser script execution failed")
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(Vec::new())
|
||||
parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data))
|
||||
}
|
||||
|
||||
fn navigate_hotlist_with_retry<T: Transport + 'static>(
|
||||
@@ -212,130 +210,71 @@ fn export_screen<T: Transport>(
|
||||
Ok(format!("已生成知乎热榜大屏 {output_path}"))
|
||||
}
|
||||
|
||||
fn parse_hotlist_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let mut items = parse_single_line_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return items;
|
||||
}
|
||||
|
||||
let lines = normalize_lines(text);
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
let mut idx = 0usize;
|
||||
|
||||
while idx < lines.len() && items.len() < top_n {
|
||||
let Some(rank) = parse_rank(&lines[idx]) else {
|
||||
idx += 1;
|
||||
continue;
|
||||
};
|
||||
if !seen_ranks.insert(rank) {
|
||||
idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut title = None;
|
||||
let mut heat = None;
|
||||
for candidate in lines.iter().skip(idx + 1).take(6) {
|
||||
if parse_rank(candidate).is_some() {
|
||||
break;
|
||||
}
|
||||
if heat.is_none() && looks_like_heat(candidate) {
|
||||
heat = Some(normalize_heat(candidate));
|
||||
continue;
|
||||
}
|
||||
if title.is_none() && !is_noise_line(candidate) {
|
||||
title = Some(candidate.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(title), Some(heat)) = (title, heat) {
|
||||
items.push(HotlistItem { rank, title, heat });
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
items.sort_by_key(|item| item.rank);
|
||||
items.truncate(top_n);
|
||||
items
|
||||
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
|
||||
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")))
|
||||
.join("skill_lib")
|
||||
.join("skills")
|
||||
.join("zhihu-hotlist")
|
||||
.join("scripts")
|
||||
.join("extract_hotlist.js");
|
||||
let script = fs::read_to_string(&script_path).map_err(|err| {
|
||||
PipeError::Protocol(format!(
|
||||
"failed to read zhihu hotlist extractor script {}: {err}",
|
||||
script_path.display()
|
||||
))
|
||||
})?;
|
||||
Ok(format!(
|
||||
"(function() {{\nconst args = {};\n{}\n}})()",
|
||||
json!({ "top_n": top_n.to_string() }),
|
||||
script
|
||||
))
|
||||
}
|
||||
|
||||
fn parse_single_line_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let re = Regex::new(
|
||||
r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$",
|
||||
)
|
||||
.expect("valid hotlist single-line regex");
|
||||
let mut items = Vec::new();
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
fn parse_hotlist_items_payload(payload: &Value) -> Result<Vec<HotlistItem>, PipeError> {
|
||||
let normalized_payload = if let Some(text) = payload.as_str() {
|
||||
serde_json::from_str::<Value>(text).unwrap_or_else(|_| Value::String(text.to_string()))
|
||||
} else {
|
||||
payload.clone()
|
||||
};
|
||||
|
||||
for capture in re.captures_iter(text) {
|
||||
let rank = capture
|
||||
.get(1)
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.unwrap_or_default();
|
||||
if rank == 0 || !seen_ranks.insert(rank) {
|
||||
let rows = normalized_payload
|
||||
.get("rows")
|
||||
.and_then(Value::as_array)
|
||||
.ok_or_else(|| {
|
||||
PipeError::Protocol("知乎热榜采集失败:浏览器脚本未返回 rows".to_string())
|
||||
})?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
for row in rows {
|
||||
let Some(cells) = row.as_array() else {
|
||||
continue;
|
||||
};
|
||||
if cells.len() != 3 {
|
||||
continue;
|
||||
}
|
||||
let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
|
||||
let rank = cells[0]
|
||||
.as_u64()
|
||||
.or_else(|| cells[0].as_str().and_then(|value| value.parse::<u64>().ok()))
|
||||
.unwrap_or((items.len() + 1) as u64);
|
||||
let title = cells[1].as_str().unwrap_or_default().trim().to_string();
|
||||
let heat = cells[2].as_str().unwrap_or_default().trim().to_string();
|
||||
if title.is_empty() || heat.is_empty() {
|
||||
continue;
|
||||
}
|
||||
items.push(HotlistItem {
|
||||
rank,
|
||||
title: title.to_string(),
|
||||
heat: normalize_heat(heat),
|
||||
});
|
||||
if items.len() >= top_n {
|
||||
break;
|
||||
}
|
||||
items.push(HotlistItem { rank, title, heat });
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn normalize_lines(text: &str) -> Vec<String> {
|
||||
text.lines()
|
||||
.map(str::trim)
|
||||
.filter(|line| !line.is_empty())
|
||||
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_rank(line: &str) -> Option<u64> {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if trimmed.chars().all(|ch| ch.is_ascii_digit()) {
|
||||
return trimmed.parse::<u64>().ok().filter(|value| *value > 0);
|
||||
if items.is_empty() {
|
||||
return Err(PipeError::Protocol(
|
||||
"知乎热榜采集失败:浏览器脚本未返回有效热榜条目".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex");
|
||||
rank_re
|
||||
.captures(trimmed)
|
||||
.and_then(|capture| capture.get(1))
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.filter(|value| *value > 0)
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn looks_like_heat(line: &str) -> bool {
|
||||
let compact = line.replace(' ', "");
|
||||
let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex");
|
||||
heat_re.is_match(compact.as_str())
|
||||
}
|
||||
|
||||
fn normalize_heat(line: &str) -> String {
|
||||
line.replace(' ', "")
|
||||
.trim_end_matches("热度")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn is_noise_line(line: &str) -> bool {
|
||||
matches!(
|
||||
line,
|
||||
"知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容"
|
||||
)
|
||||
}
|
||||
|
||||
fn extract_top_n(instruction: &str) -> usize {
|
||||
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
|
||||
re.captures(&instruction.to_ascii_lowercase())
|
||||
|
||||
Reference in New Issue
Block a user