feat: add browser script skill execution

2026-03-30 02:15:07 +08:00
parent f7e2ff256e
commit d2c9902966
22 changed files with 1775 additions and 249 deletions
--- a/src/runtime/engine.rs
+++ b/src/runtime/engine.rs
@@ -21,7 +21,7 @@ const READ_SKILL_TOOL_NAME: &str = "read_skill";
 const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office";
 const SCREEN_HTML_EXPORT_TOOL_NAME: &str = "screen_html_export";
 const BROWSER_TOOL_CONTRACT_PROMPT: &str = "SuperRPA browser interface contract:\n- Use superrpa_browser as the preferred dedicated SuperRPA interface inside this browser host.\n- browser_action is a legacy alias with the same contract; prefer superrpa_browser when choosing between them.\n- Browser actions allowed by policy are already approved by the user inside this BrowserAttached host.\n- Do not claim a browser action was denied, blocked, or rejected unless an actual tool call returns an error.\n- expected_domain must be the bare hostname only, for example www.zhihu.com.\n- Never include scheme, path, query, fragment, or port in expected_domain.\n- selector values are executed with document.querySelector(...), so they must be valid CSS selectors only.\n- Never use XPath selectors or jQuery-style :contains().\n- Prefer direct navigation to canonical URLs when they are known, instead of clicking text links to reach common pages.\n- If you need broad page content, use getText with a valid CSS selector such as body or a stable container.\n- If a task matches an installed skill, load that skill first and then execute it through the SuperRPA interface.";
-const ZHIHU_HOTLIST_EXECUTION_PROMPT: &str = "Zhihu hotlist execution contract:\n- Treat Zhihu hotlist export/presentation requests as a real browser workflow, not as a text-only summarization task.\n- You must attempt the browser workflow before concluding failure; a prose-only answer is invalid for this workflow.\n- If the current page is not already `https://www.zhihu.com/hot`, navigate there first.\n- Collect the live list with superrpa_browser using `getText` on `main` first; only fall back to `body` or `html` if `main` is unavailable.\n- Extract ordered rows containing `rank`, `title`, and `heat` from the live page text.\n- Do not use shell, web_fetch, web_search_tool, or fabricated sample data for this workflow.\n- Do not repeat the same sentence or section in your final answer.";
+const ZHIHU_HOTLIST_EXECUTION_PROMPT: &str = "Zhihu hotlist execution contract:\n- Treat Zhihu hotlist export/presentation requests as a real browser workflow, not as a text-only summarization task.\n- You must attempt the browser workflow before concluding failure; a prose-only answer is invalid for this workflow.\n- If the current page is not already `https://www.zhihu.com/hot`, navigate there first.\n- If the `zhihu-hotlist.extract_hotlist` skill tool is available, call it before any generic browser probing.\n- Use generic `getText` only as a last-resort fallback when the packaged extractor fails.\n- Extract ordered rows containing `rank`, `title`, and `heat` as structured data.\n- Do not use shell, web_fetch, web_search_tool, or fabricated sample data for this workflow.\n- Do not repeat the same sentence or section in your final answer.";
 const OFFICE_EXPORT_COMPLETION_PROMPT: &str = "Export completion contract:\n- This task requires a real Excel export.\n- After the Zhihu rows are available, you must call openxml_office before finishing.\n- Never fabricate, simulate, or invent substitute hotlist data when a live collection/export task fails.\n- If live collection fails, report the failure concisely instead of producing fake rows.\n- Do not stop after describing how you will parse or export the data.\n- Do not repeat the same sentence or section in your final answer.\n- Your final answer must include the generated local .xlsx path.";
 const SCREEN_EXPORT_COMPLETION_PROMPT: &str = "Presentation completion contract:\n- This task requires a real dashboard artifact.\n- After the Zhihu rows are available, you must call screen_html_export before finishing.\n- Do not stop after describing how you will render or present the data.\n- Do not repeat the same sentence or section in your final answer.\n- Your final answer must include the local .html path and the presentation object.";

@@ -125,6 +125,7 @@ impl RuntimeEngine {
            .skills_prompt_mode(config.skills.prompt_injection_mode)
            .allowed_tools(self.allowed_tools_for_config(
                config,
+                skills_dir,
                browser_surface_present,
                instruction,
            ))
@@ -204,6 +205,7 @@ impl RuntimeEngine {
    fn allowed_tools_for_config(
        &self,
        config: &ZeroClawConfig,
+        skills_dir: &Path,
        browser_surface_present: bool,
        instruction: &str,
    ) -> Option<Vec<String>> {
@@ -228,6 +230,11 @@ impl RuntimeEngine {
        if task_needs_local_file_read(instruction) {
            allowed_tools.push("file_read".to_string());
        }
+        if browser_surface_present {
+            allowed_tools.extend(browser_script_tool_names(&load_runtime_skills(
+                config, skills_dir,
+            )));
+        }
        allowed_tools.dedup();

        if matches!(self.profile, RuntimeProfile::GeneralAssistant) &&
@@ -240,6 +247,20 @@ impl RuntimeEngine {
    }
 }

+fn browser_script_tool_names(skills: &[zeroclaw::skills::Skill]) -> Vec<String> {
+    skills
+        .iter()
+        .flat_map(|skill| {
+            skill
+                .tools
+                .iter()
+                .filter(|tool| tool.kind == "browser_script")
+                .map(|tool| format!("{}.{}", skill.name, tool.name))
+                .collect::<Vec<_>>()
+        })
+        .collect()
+}
+
 fn task_needs_local_file_read(instruction: &str) -> bool {
    let normalized = instruction.trim();
    normalized.contains("/home/") ||