From d2c9902966fb1ec54b837aaf089aa3195fc59e12 Mon Sep 17 00:00:00 2001 From: zyl Date: Mon, 30 Mar 2026 02:15:07 +0800 Subject: [PATCH] feat: add browser script skill execution --- ...30-browser-script-skill-capability-plan.md | 175 ++++++ resources/rules.json | 4 +- scripts/validate_skill_lib.py | 532 ++++++++++++++++++ src/compat/browser_script_skill_tool.rs | 254 +++++++++ src/compat/mod.rs | 1 + src/compat/runtime.rs | 8 + src/compat/workflow_executor.rs | 207 +++---- src/pipe/browser_tool.rs | 8 +- src/pipe/handshake.rs | 17 +- src/pipe/mod.rs | 4 +- src/pipe/protocol.rs | 74 +++ src/runtime/engine.rs | 23 +- src/security/mac_policy.rs | 9 +- tests/browser_script_skill_tool_test.rs | 127 +++++ tests/browser_tool_test.rs | 18 +- tests/compat_browser_tool_test.rs | 70 ++- tests/compat_runtime_test.rs | 364 ++++++++---- tests/pipe_handshake_test.rs | 25 +- tests/pipe_protocol_test.rs | 33 +- tests/skill_lib_validation_test.py | 21 +- third_party/zeroclaw/src/skills/mod.rs | 33 +- third_party/zeroclaw/src/tools/read_skill.rs | 17 + 22 files changed, 1775 insertions(+), 249 deletions(-) create mode 100644 docs/plans/2026-03-30-browser-script-skill-capability-plan.md create mode 100644 scripts/validate_skill_lib.py create mode 100644 src/compat/browser_script_skill_tool.rs create mode 100644 tests/browser_script_skill_tool_test.rs diff --git a/docs/plans/2026-03-30-browser-script-skill-capability-plan.md b/docs/plans/2026-03-30-browser-script-skill-capability-plan.md new file mode 100644 index 0000000..ffbafe4 --- /dev/null +++ b/docs/plans/2026-03-30-browser-script-skill-capability-plan.md @@ -0,0 +1,175 @@ +# Browser Script Skill Capability Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Let zeroclaw-backed sgclaw skills call deterministic browser-context scripts through existing SuperRPA page-execution capabilities. + +**Architecture:** Reuse SuperRPA's existing page script execution path instead of inventing a parallel workflow executor. Extend sgclaw's browser pipe and zeroclaw skill-tool mapping so a skill can declare a browser script tool, execute a packaged script file with parameters, and receive structured JSON back in the current page context. + +**Tech Stack:** Rust (`sgclaw`, vendored `zeroclaw`), Chromium/SuperRPA C++, existing sgclaw browser pipe protocol, skill package `SKILL.toml` / `scripts/`. + +### Task 1: Define the browser-script contract + +**Files:** +- Modify: `third_party/zeroclaw/src/skills/mod.rs` +- Modify: `third_party/zeroclaw/src/tools/mod.rs` +- Create: `third_party/zeroclaw/src/tools/browser_script_skill.rs` +- Test: `tests/compat_browser_tool_test.rs` + +**Step 1: Write the failing test** + +Add a sgclaw-side test proving the browser tool schema accepts a script execution action with required browser-script fields and rejects missing script payload. + +**Step 2: Run test to verify it fails** + +Run: `cargo test --manifest-path /home/zyl/projects/sgClaw/claw/Cargo.toml --test compat_browser_tool_test` +Expected: FAIL because the browser tool still exposes only `click/type/navigate/getText`. + +**Step 3: Write minimal implementation** + +Extend the browser tool adapter and protocol-facing action parsing to support a deterministic script execution action carrying: +- `action` +- `expected_domain` +- `script` +- optional `args` + +**Step 4: Run test to verify it passes** + +Run: `cargo test --manifest-path /home/zyl/projects/sgClaw/claw/Cargo.toml --test compat_browser_tool_test` +Expected: PASS for the new browser script action coverage. + +**Step 5: Commit** + +```bash +git add tests/compat_browser_tool_test.rs src/compat/browser_tool_adapter.rs src/pipe/protocol.rs third_party/zeroclaw/src/skills/mod.rs third_party/zeroclaw/src/tools/mod.rs third_party/zeroclaw/src/tools/browser_script_skill.rs +git commit -m "feat: add browser script skill tool support" +``` + +### Task 2: Wire browser script execution through the sgclaw host bridge + +**Files:** +- Modify: `src/pipe/protocol.rs` +- Modify: `/home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_pipe_protocol.cc` +- Modify: `/home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_security_gate.cc` +- Test: `/home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_pipe_protocol_mainline_unittest.cc` +- Test: `/home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_process_host_mainline_unittest.cc` + +**Step 1: Write the failing tests** + +Add host-side tests proving: +- sgclaw protocol accepts the new browser script action +- handshake advertises the action +- the security gate allows it only through the sgclaw host allowlist + +**Step 2: Run tests to verify they fail** + +Run the relevant SuperRPA unit tests. +Expected: FAIL because the host protocol and allowlist do not yet include browser script execution. + +**Step 3: Write minimal implementation** + +Reuse existing SuperRPA router support by bridging the new sgclaw action to the host's page script execution capability instead of inventing a second execution path. + +**Step 4: Run tests to verify they pass** + +Run the same SuperRPA unit tests. +Expected: PASS with the new action accepted end-to-end. + +**Step 5: Commit** + +```bash +git add src/pipe/protocol.rs /home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_pipe_protocol.cc /home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_security_gate.cc /home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_pipe_protocol_mainline_unittest.cc /home/zyl/projects/superRpa/src/chrome/browser/superrpa/sgclaw/sgclaw_process_host_mainline_unittest.cc +git commit -m "feat: expose browser script execution to sgclaw" +``` + +### Task 3: Convert Zhihu hotlist collection into a script-backed skill path + +**Files:** +- Create: `/home/zyl/projects/sgClaw/skill_lib/skills/zhihu-hotlist/SKILL.toml` +- Create: `/home/zyl/projects/sgClaw/skill_lib/skills/zhihu-hotlist/scripts/extract_hotlist.js` +- Modify: `/home/zyl/projects/sgClaw/skill_lib/skills/zhihu-hotlist/SKILL.md` +- Modify: `tests/compat_runtime_test.rs` + +**Step 1: Write the failing test** + +Add runtime coverage proving the Zhihu hotlist export flow uses the skill-backed browser script tool instead of repeated `getText` probing. + +**Step 2: Run test to verify it fails** + +Run: `cargo test --manifest-path /home/zyl/projects/sgClaw/claw/Cargo.toml --test compat_runtime_test` +Expected: FAIL because the current flow still uses `read_skill` plus generic `getText`. + +**Step 3: Write minimal implementation** + +Package the hotlist extractor as a deterministic browser script tool returning structured rows and update the skill description to require that tool before export. + +**Step 4: Run test to verify it passes** + +Run: `cargo test --manifest-path /home/zyl/projects/sgClaw/claw/Cargo.toml --test compat_runtime_test` +Expected: PASS with the script-backed tool visible in logs and the generic repeated `getText` fallback removed from the primary path. + +**Step 5: Commit** + +```bash +git add /home/zyl/projects/sgClaw/skill_lib/skills/zhihu-hotlist/SKILL.toml /home/zyl/projects/sgClaw/skill_lib/skills/zhihu-hotlist/scripts/extract_hotlist.js /home/zyl/projects/sgClaw/skill_lib/skills/zhihu-hotlist/SKILL.md tests/compat_runtime_test.rs +git commit -m "feat: make zhihu hotlist extraction script-backed" +``` + +### Task 4: Fix plan visibility in the sgclaw frontend + +**Files:** +- Modify: `/home/zyl/projects/superRpa/src/chrome/browser/resources/superrpa/devtools/functions/sgclaw-chat/sgclaw-chat_state.ts` +- Modify: `/home/zyl/projects/superRpa/src/chrome/browser/resources/superrpa/devtools/functions/sgclaw-chat/sgclaw-chat.ts` +- Test: frontend or host tests covering `[sgclaw/plan]` + +**Step 1: Write the failing test** + +Add coverage proving `[sgclaw/plan]` logs are treated as planner logs and preserve multiline task-step rendering. + +**Step 2: Run test to verify it fails** + +Run the relevant frontend or browser-side tests. +Expected: FAIL because the current frontend only recognizes `[browser/plan]` or `[plan]`. + +**Step 3: Write minimal implementation** + +Accept `[sgclaw/plan]` as a planner log source and keep the plan text rendered as multiple visible lines. + +**Step 4: Run test to verify it passes** + +Run the same tests. +Expected: PASS with sgclaw planner logs rendered correctly. + +**Step 5: Commit** + +```bash +git add /home/zyl/projects/superRpa/src/chrome/browser/resources/superrpa/devtools/functions/sgclaw-chat/sgclaw-chat_state.ts /home/zyl/projects/superRpa/src/chrome/browser/resources/superrpa/devtools/functions/sgclaw-chat/sgclaw-chat.ts +git commit -m "fix: render sgclaw planner logs in chat ui" +``` + +### Task 5: Full verification + +**Files:** +- Verify only + +**Step 1: Run sgclaw Rust tests** + +Run: `cargo test --manifest-path /home/zyl/projects/sgClaw/claw/Cargo.toml` +Expected: PASS. + +**Step 2: Run relevant SuperRPA unit tests** + +Run the updated sgclaw host and frontend tests. +Expected: PASS. + +**Step 3: Run a manual browser acceptance flow** + +Verify that: +- startup logs include runtime and skill versions +- `[sgclaw/plan]` is shown as a real multi-line plan +- Zhihu hotlist export invokes the script-backed skill tool +- the flow returns a real `.xlsx` path without repeated self-referential output + +**Step 4: Commit verification artifacts if needed** + +Keep verification commits separate from behavior changes. diff --git a/resources/rules.json b/resources/rules.json index dce381e..2dc4b31 100644 --- a/resources/rules.json +++ b/resources/rules.json @@ -13,7 +13,7 @@ ] }, "pipe_actions": { - "allowed": ["click", "type", "navigate", "getText"], - "blocked": ["eval", "executeJsInPage"] + "allowed": ["click", "type", "navigate", "getText", "eval"], + "blocked": ["executeJsInPage"] } } diff --git a/scripts/validate_skill_lib.py b/scripts/validate_skill_lib.py new file mode 100644 index 0000000..9df158a --- /dev/null +++ b/scripts/validate_skill_lib.py @@ -0,0 +1,532 @@ +import argparse +import re +import sys +import tomllib +from pathlib import Path +from typing import NamedTuple + + +MAX_TEXT_FILE_BYTES = 512 * 1024 +SCRIPT_SUFFIXES = ( + ".sh", + ".bash", + ".zsh", + ".ksh", + ".fish", + ".ps1", + ".bat", + ".cmd", +) +HIGH_RISK_PATTERNS = ( + (re.compile(r"(?im)\bcurl\b[^\n|]{0,200}\|\s*(?:sh|bash|zsh)\b"), "curl-pipe-shell"), + (re.compile(r"(?im)\bwget\b[^\n|]{0,200}\|\s*(?:sh|bash|zsh)\b"), "wget-pipe-shell"), + (re.compile(r"(?im)\b(?:invoke-expression|iex)\b"), "powershell-iex"), + (re.compile(r"(?im)\brm\s+-rf\s+/"), "destructive-rm-rf-root"), + (re.compile(r"(?im)\bnc(?:at)?\b[^\n]{0,120}\s-e\b"), "netcat-remote-exec"), + (re.compile(r"(?im)\bdd\s+if="), "disk-overwrite-dd"), + (re.compile(r"(?im)\bmkfs(?:\.[a-z0-9]+)?\b"), "filesystem-format"), + (re.compile(r"(?im):\(\)\s*\{\s*:\|\:&\s*\};:"), "fork-bomb"), +) +MARKDOWN_LINK_RE = re.compile(r"\[[^\]]*\]\(([^)]+)\)") + +REPO_ROOT = Path(__file__).resolve().parents[1] +SKILL_LIB_ROOT = REPO_ROOT.parent / "skill_lib" +SKILLS_DIR = SKILL_LIB_ROOT / "skills" + + +class SkillRecord(NamedTuple): + name: str + description: str + version: str + author: str | None + tags: list[str] + prompt_body: str + location: Path + + +class AuditReport(NamedTuple): + files_scanned: int + findings: list[str] + + +class ValidationResult(NamedTuple): + record: SkillRecord + report: AuditReport + ok: bool + + +def discover_skill_dirs(skills_dir: Path | None = None) -> list[Path]: + root = skills_dir or SKILLS_DIR + if not root.exists(): + return [] + return sorted(path for path in root.iterdir() if path.is_dir()) + + +def load_skill(skill_dir: Path) -> SkillRecord: + manifest_path = skill_dir / "SKILL.toml" + markdown_path = skill_dir / "SKILL.md" + + if manifest_path.is_file(): + manifest = tomllib.loads(manifest_path.read_text(encoding="utf-8")) + skill_meta = manifest.get("skill", {}) + prompts = manifest.get("prompts", []) + body = "" + if markdown_path.is_file(): + _, body = parse_skill_markdown(markdown_path.read_text(encoding="utf-8")) + elif prompts: + body = "\n\n".join(str(prompt) for prompt in prompts) + + description = skill_meta.get("description") + if not description or not str(description).strip(): + description = extract_description(body) + + return SkillRecord( + name=skill_meta.get("name") or skill_dir.name, + description=str(description), + version=str(skill_meta.get("version") or "0.1.0"), + author=skill_meta.get("author") or None, + tags=list(skill_meta.get("tags", [])), + prompt_body=body, + location=manifest_path, + ) + + skill_path = markdown_path + content = skill_path.read_text(encoding="utf-8") + meta, body = parse_skill_markdown(content) + + name = meta["name"] or skill_dir.name + description = meta["description"] + if not description or not description.strip(): + description = extract_description(body) + + version = meta["version"] or "0.1.0" + author = meta["author"] or None + tags = list(meta["tags"]) + + return SkillRecord( + name=name, + description=description, + version=version, + author=author, + tags=tags, + prompt_body=body, + location=skill_path, + ) + + +def validate_all_skills(allow_scripts: bool = False) -> list[ValidationResult]: + results = [] + for skill_dir in discover_skill_dirs(): + record = load_skill(skill_dir) + report = audit_skill_directory(skill_dir, allow_scripts=allow_scripts) + results.append(ValidationResult(record=record, report=report, ok=not report.findings)) + return results + + +def parse_skill_markdown(content: str) -> tuple[dict[str, object], str]: + frontmatter = split_skill_frontmatter(content) + if frontmatter is None: + return empty_meta(), content + raw_frontmatter, body = frontmatter + return parse_simple_frontmatter(raw_frontmatter), body + + +def split_skill_frontmatter(content: str) -> tuple[str, str] | None: + normalized = content.replace("\r\n", "\n") + if not normalized.startswith("---\n"): + return None + rest = normalized[len("---\n") :] + marker = "\n---\n" + idx = rest.find(marker) + if idx != -1: + return rest[:idx], rest[idx + len(marker) :] + if rest.endswith("\n---"): + return rest[:-4], "" + return None + + +def parse_simple_frontmatter(frontmatter: str) -> dict[str, object]: + meta = empty_meta() + collecting_tags = False + + for raw_line in frontmatter.splitlines(): + if collecting_tags: + trimmed = raw_line.strip() + if trimmed.startswith("- "): + tag = trimmed[2:].strip().strip('"').strip("'") + if tag: + meta["tags"].append(tag) + continue + collecting_tags = False + + if ":" not in raw_line: + continue + key, value = raw_line.split(":", 1) + key = key.strip() + value = value.strip().strip('"').strip("'") + + if key == "name": + meta["name"] = value + elif key == "description": + meta["description"] = value + elif key == "version": + meta["version"] = value + elif key == "author": + meta["author"] = value + elif key == "tags": + if not value: + collecting_tags = True + else: + cleaned = value.lstrip("[").rstrip("]") + meta["tags"] = [ + item.strip().strip('"').strip("'") + for item in cleaned.split(",") + if item.strip().strip('"').strip("'") + ] + + return meta + + +def empty_meta() -> dict[str, object]: + return { + "name": None, + "description": None, + "version": None, + "author": None, + "tags": [], + } + + +def extract_description(body: str) -> str: + for line in body.splitlines(): + if line.startswith("#"): + continue + if not line.strip(): + continue + return line.strip() + return "No description" + + +def audit_skill_directory(skill_dir: Path, allow_scripts: bool = False) -> AuditReport: + if not skill_dir.exists(): + raise FileNotFoundError(f"Skill source does not exist: {skill_dir}") + if not skill_dir.is_dir(): + raise NotADirectoryError(f"Skill source must be a directory: {skill_dir}") + + canonical_root = skill_dir.resolve() + findings: list[str] = [] + files_scanned = 0 + + has_manifest = (canonical_root / "SKILL.md").is_file() or (canonical_root / "SKILL.toml").is_file() + if not has_manifest: + findings.append( + "Skill root must include SKILL.md or SKILL.toml for deterministic auditing." + ) + + for path in collect_paths_depth_first(canonical_root): + files_scanned += 1 + findings.extend(audit_path(canonical_root, path, allow_scripts=allow_scripts)) + + return AuditReport(files_scanned=files_scanned, findings=findings) + + +def collect_paths_depth_first(root: Path) -> list[Path]: + stack = [root] + discovered: list[Path] = [] + + while stack: + current = stack.pop() + discovered.append(current) + if not current.is_dir(): + continue + children = sorted(current.iterdir()) + for child in reversed(children): + stack.append(child) + + return discovered + + +def audit_path(root: Path, path: Path, allow_scripts: bool) -> list[str]: + findings: list[str] = [] + metadata = path.lstat() + rel = relative_display(root, path) + + if path.is_symlink(): + findings.append(f"{rel}: symlinks are not allowed in installed skills.") + return findings + + if path.is_dir(): + return findings + + if not allow_scripts and is_unsupported_script_file(path): + findings.append(f"{rel}: script-like files are blocked by skill security policy.") + + if metadata.st_size > MAX_TEXT_FILE_BYTES and (is_markdown_file(path) or is_toml_file(path)): + findings.append(f"{rel}: file is too large for static audit (>{MAX_TEXT_FILE_BYTES} bytes).") + return findings + + if is_markdown_file(path): + findings.extend(audit_markdown_file(root, path)) + elif is_toml_file(path): + findings.extend(audit_manifest_file(root, path)) + + return findings + + +def audit_markdown_file(root: Path, path: Path) -> list[str]: + findings: list[str] = [] + content = path.read_text(encoding="utf-8") + rel = relative_display(root, path) + + pattern = detect_high_risk_snippet(content) + if pattern: + findings.append(f"{rel}: detected high-risk command pattern ({pattern}).") + + for target in extract_markdown_links(content): + findings.extend(audit_markdown_link_target(root, path, target)) + + return findings + + +def audit_manifest_file(root: Path, path: Path) -> list[str]: + findings: list[str] = [] + content = path.read_text(encoding="utf-8") + rel = relative_display(root, path) + + pattern = detect_high_risk_snippet(content) + if pattern: + findings.append(f"{rel}: detected high-risk command pattern ({pattern}).") + + if any(operator in content for operator in ("&&", "||", ";", "`", "$(")): + findings.append(f"{rel}: manifest content uses shell chaining operators, which are blocked.") + + return findings + + +def extract_markdown_links(content: str) -> list[str]: + return [match.group(1).strip() for match in MARKDOWN_LINK_RE.finditer(content)] + + +def audit_markdown_link_target(root: Path, source: Path, raw_target: str) -> list[str]: + findings: list[str] = [] + normalized = normalize_markdown_target(raw_target) + if not normalized or normalized.startswith("#"): + return findings + + rel = relative_display(root, source) + scheme = url_scheme(normalized) + if scheme: + if scheme in {"http", "https", "mailto"}: + if has_markdown_suffix(normalized): + findings.append( + f"{rel}: remote markdown links are blocked by skill security audit ({normalized})." + ) + return findings + findings.append(f"{rel}: unsupported URL scheme in markdown link ({normalized}).") + return findings + + stripped = strip_query_and_fragment(normalized) + if not stripped: + return findings + + if looks_like_absolute_path(stripped): + findings.append(f"{rel}: absolute markdown link paths are not allowed ({normalized}).") + return findings + + if has_script_suffix(stripped): + findings.append(f"{rel}: markdown links to script files are blocked ({normalized}).") + + if not has_markdown_suffix(stripped): + return findings + + base_dir = source.parent + linked_path = base_dir / stripped + + try: + canonical_target = linked_path.resolve(strict=True) + except FileNotFoundError: + if is_cross_skill_reference(stripped): + return findings + findings.append(f"{rel}: markdown link points to a missing file ({normalized}).") + return findings + + if not is_subpath(canonical_target, root): + skills_root = skills_root_for(root) + if skills_root and is_subpath(canonical_target, skills_root): + if not canonical_target.is_file(): + findings.append(f"{rel}: markdown link must point to a file ({normalized}).") + return findings + findings.append(f"{rel}: markdown link escapes skill root ({normalized}).") + return findings + + if not canonical_target.is_file(): + findings.append(f"{rel}: markdown link must point to a file ({normalized}).") + + return findings + + +def detect_high_risk_snippet(content: str) -> str | None: + for pattern, label in HIGH_RISK_PATTERNS: + if pattern.search(content): + return label + return None + + +def normalize_markdown_target(raw_target: str) -> str: + trimmed = raw_target.strip() + if trimmed.startswith("<"): + trimmed = trimmed[1:] + if trimmed.endswith(">"): + trimmed = trimmed[:-1] + parts = trimmed.split() + return parts[0] if parts else "" + + +def strip_query_and_fragment(target: str) -> str: + end = len(target) + hash_idx = target.find("#") + if hash_idx != -1: + end = min(end, hash_idx) + query_idx = target.find("?") + if query_idx != -1: + end = min(end, query_idx) + return target[:end] + + +def url_scheme(target: str) -> str | None: + if ":" not in target: + return None + scheme, rest = target.split(":", 1) + if not scheme or not rest: + return None + if not all(ch.isalnum() or ch in "+-." for ch in scheme): + return None + return scheme + + +def looks_like_absolute_path(target: str) -> bool: + if Path(target).is_absolute(): + return True + if len(target) >= 3 and target[0].isalpha() and target[1] == ":" and target[2] in "\\/": + return True + return target.startswith("~/") + + +def is_cross_skill_reference(target: str) -> bool: + normalized = target[2:] if target.startswith("./") else target + path = Path(target) + + if ".." in path.parts: + return True + + return "/" not in normalized and "\\" not in normalized and has_markdown_suffix(normalized) + + +def skills_root_for(root: Path) -> Path | None: + current = root + while True: + if current.name == "skills": + return current + if current.parent == current: + return None + current = current.parent + + +def relative_display(root: Path, path: Path) -> str: + try: + rel = path.relative_to(root) + except ValueError: + return str(path) + return "." if str(rel) == "." else str(rel) + + +def is_markdown_file(path: Path) -> bool: + return path.suffix.lower() in {".md", ".markdown"} + + +def is_toml_file(path: Path) -> bool: + return path.suffix.lower() == ".toml" + + +def is_unsupported_script_file(path: Path) -> bool: + return has_script_suffix(str(path).lower()) or has_shell_shebang(path) + + +def has_script_suffix(raw: str) -> bool: + lowered = raw.lower() + return any(lowered.endswith(suffix) for suffix in SCRIPT_SUFFIXES) + + +def has_shell_shebang(path: Path) -> bool: + try: + prefix = path.read_bytes()[:128] + except OSError: + return False + + first_line = prefix.decode("utf-8", errors="ignore").splitlines()[0].strip().lower() if prefix else "" + interpreter = shebang_interpreter(first_line) + return interpreter in {"sh", "bash", "zsh", "ksh", "fish", "pwsh", "powershell"} + + +def shebang_interpreter(line: str) -> str | None: + if not line.startswith("#!"): + return None + + shebang = line[2:].strip() + if not shebang: + return None + + parts = shebang.split() + first = Path(parts[0]).name + + if first == "env": + for part in parts[1:]: + if part.startswith("-"): + continue + return Path(part).name + return None + + return first + + +def has_markdown_suffix(target: str) -> bool: + lowered = target.lower() + return lowered.endswith(".md") or lowered.endswith(".markdown") + + +def is_subpath(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + return True + except ValueError: + return False + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Validate the sibling skill_lib against ZeroClaw-like rules.") + parser.add_argument( + "--allow-scripts", + action="store_true", + help="Allow shell-script files during auditing.", + ) + args = parser.parse_args(argv) + + results = validate_all_skills(allow_scripts=args.allow_scripts) + if not results: + print(f"FAIL no skills discovered under {SKILLS_DIR}") + return 1 + + all_ok = True + for result in results: + status = "PASS" if result.ok else "FAIL" + print(f"{status} {result.record.name}") + for finding in result.report.findings: + print(f" - {finding}") + all_ok = all_ok and result.ok + + print(f"Checked {len(results)} skills in {SKILL_LIB_ROOT}") + return 0 if all_ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/compat/browser_script_skill_tool.rs b/src/compat/browser_script_skill_tool.rs new file mode 100644 index 0000000..5e9164b --- /dev/null +++ b/src/compat/browser_script_skill_tool.rs @@ -0,0 +1,254 @@ +use std::collections::HashMap; +use std::fs; +use std::path::{Path, PathBuf}; + +use async_trait::async_trait; +use reqwest::Url; +use serde_json::{json, Value}; +use zeroclaw::skills::{Skill, SkillTool}; +use zeroclaw::tools::{Tool, ToolResult}; + +use crate::pipe::{Action, BrowserPipeTool, Transport}; + +pub struct BrowserScriptSkillTool { + tool_name: String, + tool_description: String, + script_path: PathBuf, + args: HashMap, + browser_tool: BrowserPipeTool, +} + +impl BrowserScriptSkillTool { + pub fn new( + skill_name: &str, + tool: &SkillTool, + skill_root: &Path, + browser_tool: BrowserPipeTool, + ) -> anyhow::Result { + let script_path = skill_root.join(&tool.command); + let canonical_skill_root = skill_root.canonicalize().unwrap_or_else(|_| skill_root.to_path_buf()); + let canonical_script_path = script_path + .canonicalize() + .map_err(|err| anyhow::anyhow!("failed to resolve browser script {}: {err}", script_path.display()))?; + if !canonical_script_path.starts_with(&canonical_skill_root) { + anyhow::bail!( + "browser script path escapes skill root: {}", + canonical_script_path.display() + ); + } + + Ok(Self { + tool_name: format!("{}.{}", skill_name, tool.name), + tool_description: tool.description.clone(), + script_path: canonical_script_path, + args: tool.args.clone(), + browser_tool, + }) + } + + fn build_parameters_schema(&self) -> Value { + let mut properties = serde_json::Map::new(); + let mut required = vec![Value::String("expected_domain".to_string())]; + + properties.insert( + "expected_domain".to_string(), + json!({ + "type": "string", + "description": "Bare hostname for the current page, for example www.zhihu.com." + }), + ); + + for (name, description) in &self.args { + properties.insert( + name.clone(), + json!({ + "type": "string", + "description": description + }), + ); + required.push(Value::String(name.clone())); + } + + json!({ + "type": "object", + "properties": properties, + "required": required + }) + } +} + +#[async_trait] +impl Tool for BrowserScriptSkillTool { + fn name(&self) -> &str { + &self.tool_name + } + + fn description(&self) -> &str { + &self.tool_description + } + + fn parameters_schema(&self) -> Value { + self.build_parameters_schema() + } + + async fn execute(&self, args: Value) -> anyhow::Result { + let mut args = match args { + Value::Object(args) => args, + other => { + return Ok(failed_tool_result(format!( + "expected object arguments, got {other}" + ))) + } + }; + + let raw_expected_domain = match args.remove("expected_domain") { + Some(Value::String(value)) if !value.trim().is_empty() => value, + Some(other) => { + return Ok(failed_tool_result(format!( + "expected_domain must be a non-empty string, got {other}" + ))) + } + None => return Ok(failed_tool_result("missing required field expected_domain".to_string())), + }; + let expected_domain = match normalize_domain_like(&raw_expected_domain) { + Some(value) => value, + None => { + return Ok(failed_tool_result(format!( + "expected_domain must resolve to a hostname, got {raw_expected_domain:?}" + ))) + } + }; + + for required_arg in self.args.keys() { + if !args.contains_key(required_arg) { + return Ok(failed_tool_result(format!( + "missing required field {required_arg}" + ))); + } + } + + let script_body = match fs::read_to_string(&self.script_path) { + Ok(value) => value, + Err(err) => { + return Ok(failed_tool_result(format!( + "failed to read browser script {}: {err}", + self.script_path.display() + ))) + } + }; + + let wrapped_script = wrap_browser_script(&script_body, &Value::Object(args.clone())); + let result = match self.browser_tool.invoke( + Action::Eval, + json!({ "script": wrapped_script }), + &expected_domain, + ) { + Ok(result) => result, + Err(err) => return Ok(failed_tool_result(err.to_string())), + }; + + if !result.success { + return Ok(failed_tool_result(format_browser_script_error(&result.data))); + } + + let payload = result + .data + .get("text") + .cloned() + .unwrap_or_else(|| result.data.clone()); + Ok(ToolResult { + success: true, + output: stringify_tool_payload(&payload)?, + error: None, + }) + } +} + +pub fn build_browser_script_skill_tools( + skills: &[Skill], + browser_tool: BrowserPipeTool, +) -> Result>, anyhow::Error> { + let mut tools: Vec> = Vec::new(); + + for skill in skills { + let Some(location) = skill.location.as_ref() else { + continue; + }; + let Some(skill_root) = location.parent() else { + continue; + }; + + for tool in &skill.tools { + if tool.kind != "browser_script" { + continue; + } + tools.push(Box::new(BrowserScriptSkillTool::new( + &skill.name, + tool, + skill_root, + browser_tool.clone(), + )?)); + } + } + + Ok(tools) +} + +fn wrap_browser_script(script_body: &str, args: &Value) -> String { + format!( + "(function() {{\nconst args = {};\n{}\n}})()", + serde_json::to_string(args).unwrap_or_else(|_| "{}".to_string()), + script_body + ) +} + +fn stringify_tool_payload(payload: &Value) -> anyhow::Result { + Ok(match payload { + Value::String(value) => value.clone(), + Value::Null => "null".to_string(), + Value::Bool(_) | Value::Number(_) | Value::Array(_) | Value::Object(_) => { + serde_json::to_string(payload)? + } + }) +} + +fn failed_tool_result(error: String) -> ToolResult { + ToolResult { + success: false, + output: String::new(), + error: Some(error), + } +} + +fn format_browser_script_error(data: &Value) -> String { + data.get("error") + .and_then(|value| value.get("message")) + .and_then(Value::as_str) + .map(str::to_string) + .unwrap_or_else(|| format!("browser script failed: {data}")) +} + +fn normalize_domain_like(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + + if let Ok(url) = Url::parse(trimmed) { + return url.host_str().map(|host| host.to_ascii_lowercase()); + } + + let host = trimmed + .trim_start_matches("https://") + .trim_start_matches("http://") + .split(['/', '?', '#']) + .next() + .unwrap_or_default() + .split(':') + .next() + .unwrap_or_default() + .trim() + .to_ascii_lowercase(); + + (!host.is_empty()).then_some(host) +} diff --git a/src/compat/mod.rs b/src/compat/mod.rs index 7687b55..39eef2e 100644 --- a/src/compat/mod.rs +++ b/src/compat/mod.rs @@ -1,3 +1,4 @@ +pub mod browser_script_skill_tool; pub mod browser_tool_adapter; pub mod config_adapter; pub mod cron_adapter; diff --git a/src/compat/runtime.rs b/src/compat/runtime.rs index c2afdb4..782a540 100644 --- a/src/compat/runtime.rs +++ b/src/compat/runtime.rs @@ -12,6 +12,7 @@ use zeroclaw::providers::traits::{ ProviderCapabilities, StreamEvent, StreamOptions, StreamResult, }; +use crate::compat::browser_script_skill_tool::build_browser_script_skill_tools; use crate::compat::browser_tool_adapter::ZeroClawBrowserTool; use crate::compat::config_adapter::{ build_zeroclaw_config_from_sgclaw_settings, @@ -120,6 +121,7 @@ pub async fn execute_task_with_provider( message: format!("loaded skills: {}", loaded_skill_labels.join(", ")), })?; } + let browser_tool_for_scripts = browser_tool.clone(); let mut tools: Vec> = if browser_surface_present { vec![ Box::new(ZeroClawBrowserTool::new_superrpa(browser_tool.clone())), @@ -128,6 +130,12 @@ pub async fn execute_task_with_provider( } else { Vec::new() }; + if browser_surface_present { + tools.extend( + build_browser_script_skill_tools(&loaded_skills, browser_tool_for_scripts) + .map_err(map_anyhow_to_pipe_error)?, + ); + } if matches!(settings.office_backend, OfficeBackend::OpenXml) && engine.should_attach_openxml_office_tool(instruction) { diff --git a/src/compat/workflow_executor.rs b/src/compat/workflow_executor.rs index 933e175..12f0a8b 100644 --- a/src/compat/workflow_executor.rs +++ b/src/compat/workflow_executor.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeSet; +use std::fs; use std::path::Path; use regex::Regex; @@ -11,8 +11,6 @@ use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport}; const ZHIHU_DOMAIN: &str = "www.zhihu.com"; const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot"; -const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"]; - #[derive(Debug, Clone, PartialEq, Eq)] pub enum WorkflowRoute { ZhihuHotlistExportXlsx, @@ -87,28 +85,28 @@ fn collect_hotlist_items( top_n: usize, ) -> Result, PipeError> { navigate_hotlist_with_retry(transport, browser_tool)?; - - for selector in HOTLIST_ROOT_SELECTORS { - transport.send(&AgentMessage::LogEntry { - level: "info".to_string(), - message: format!("getText {selector}"), - })?; - let response = browser_tool.invoke( - Action::GetText, - json!({ "selector": selector }), - ZHIHU_DOMAIN, - )?; - if !response.success { - continue; - } - let text = response.data["text"].as_str().unwrap_or_default(); - let items = parse_hotlist_items(text, top_n); - if !items.is_empty() { - return Ok(items); - } + transport.send(&AgentMessage::LogEntry { + level: "info".to_string(), + message: "call zhihu-hotlist.extract_hotlist".to_string(), + })?; + let response = browser_tool.invoke( + Action::Eval, + json!({ "script": load_hotlist_extractor_script(top_n)? }), + ZHIHU_DOMAIN, + )?; + if !response.success { + return Err(PipeError::Protocol(format!( + "知乎热榜采集失败:{}", + response + .data + .get("error") + .and_then(|value| value.get("message")) + .and_then(Value::as_str) + .unwrap_or("browser script execution failed") + ))); } - Ok(Vec::new()) + parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data)) } fn navigate_hotlist_with_retry( @@ -212,130 +210,71 @@ fn export_screen( Ok(format!("已生成知乎热榜大屏 {output_path}")) } -fn parse_hotlist_items(text: &str, top_n: usize) -> Vec { - let mut items = parse_single_line_items(text, top_n); - if !items.is_empty() { - return items; - } - - let lines = normalize_lines(text); - let mut seen_ranks = BTreeSet::new(); - let mut idx = 0usize; - - while idx < lines.len() && items.len() < top_n { - let Some(rank) = parse_rank(&lines[idx]) else { - idx += 1; - continue; - }; - if !seen_ranks.insert(rank) { - idx += 1; - continue; - } - - let mut title = None; - let mut heat = None; - for candidate in lines.iter().skip(idx + 1).take(6) { - if parse_rank(candidate).is_some() { - break; - } - if heat.is_none() && looks_like_heat(candidate) { - heat = Some(normalize_heat(candidate)); - continue; - } - if title.is_none() && !is_noise_line(candidate) { - title = Some(candidate.clone()); - } - } - - if let (Some(title), Some(heat)) = (title, heat) { - items.push(HotlistItem { rank, title, heat }); - } - idx += 1; - } - - items.sort_by_key(|item| item.rank); - items.truncate(top_n); - items +fn load_hotlist_extractor_script(top_n: usize) -> Result { + let script_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR"))) + .join("skill_lib") + .join("skills") + .join("zhihu-hotlist") + .join("scripts") + .join("extract_hotlist.js"); + let script = fs::read_to_string(&script_path).map_err(|err| { + PipeError::Protocol(format!( + "failed to read zhihu hotlist extractor script {}: {err}", + script_path.display() + )) + })?; + Ok(format!( + "(function() {{\nconst args = {};\n{}\n}})()", + json!({ "top_n": top_n.to_string() }), + script + )) } -fn parse_single_line_items(text: &str, top_n: usize) -> Vec { - let re = Regex::new( - r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$", - ) - .expect("valid hotlist single-line regex"); - let mut items = Vec::new(); - let mut seen_ranks = BTreeSet::new(); +fn parse_hotlist_items_payload(payload: &Value) -> Result, PipeError> { + let normalized_payload = if let Some(text) = payload.as_str() { + serde_json::from_str::(text).unwrap_or_else(|_| Value::String(text.to_string())) + } else { + payload.clone() + }; - for capture in re.captures_iter(text) { - let rank = capture - .get(1) - .and_then(|value| value.as_str().parse::().ok()) - .unwrap_or_default(); - if rank == 0 || !seen_ranks.insert(rank) { + let rows = normalized_payload + .get("rows") + .and_then(Value::as_array) + .ok_or_else(|| { + PipeError::Protocol("知乎热榜采集失败:浏览器脚本未返回 rows".to_string()) + })?; + + let mut items = Vec::new(); + for row in rows { + let Some(cells) = row.as_array() else { + continue; + }; + if cells.len() != 3 { continue; } - let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or(""); - let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or(""); + + let rank = cells[0] + .as_u64() + .or_else(|| cells[0].as_str().and_then(|value| value.parse::().ok())) + .unwrap_or((items.len() + 1) as u64); + let title = cells[1].as_str().unwrap_or_default().trim().to_string(); + let heat = cells[2].as_str().unwrap_or_default().trim().to_string(); if title.is_empty() || heat.is_empty() { continue; } - items.push(HotlistItem { - rank, - title: title.to_string(), - heat: normalize_heat(heat), - }); - if items.len() >= top_n { - break; - } + items.push(HotlistItem { rank, title, heat }); } - items -} - -fn normalize_lines(text: &str) -> Vec { - text.lines() - .map(str::trim) - .filter(|line| !line.is_empty()) - .map(|line| line.split_whitespace().collect::>().join(" ")) - .collect() -} - -fn parse_rank(line: &str) -> Option { - let trimmed = line.trim(); - if trimmed.is_empty() { - return None; - } - if trimmed.chars().all(|ch| ch.is_ascii_digit()) { - return trimmed.parse::().ok().filter(|value| *value > 0); + if items.is_empty() { + return Err(PipeError::Protocol( + "知乎热榜采集失败:浏览器脚本未返回有效热榜条目".to_string(), + )); } - let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex"); - rank_re - .captures(trimmed) - .and_then(|capture| capture.get(1)) - .and_then(|value| value.as_str().parse::().ok()) - .filter(|value| *value > 0) + Ok(items) } - -fn looks_like_heat(line: &str) -> bool { - let compact = line.replace(' ', ""); - let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex"); - heat_re.is_match(compact.as_str()) -} - -fn normalize_heat(line: &str) -> String { - line.replace(' ', "") - .trim_end_matches("热度") - .to_string() -} - -fn is_noise_line(line: &str) -> bool { - matches!( - line, - "知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容" - ) -} - fn extract_top_n(instruction: &str) -> usize { let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex"); re.captures(&instruction.to_ascii_lowercase()) diff --git a/src/pipe/browser_tool.rs b/src/pipe/browser_tool.rs index 12210f5..6b94a20 100644 --- a/src/pipe/browser_tool.rs +++ b/src/pipe/browser_tool.rs @@ -4,7 +4,9 @@ use std::time::{Duration, Instant}; use serde_json::Value; -use crate::pipe::protocol::{Action, AgentMessage, BrowserMessage, SecurityFields, Timing}; +use crate::pipe::protocol::{ + Action, AgentMessage, BrowserMessage, ExecutionSurfaceMetadata, SecurityFields, Timing, +}; use crate::pipe::{PipeError, Transport}; use crate::security::{sign_command, MacPolicy}; @@ -53,6 +55,10 @@ impl BrowserPipeTool { self } + pub fn surface_metadata(&self) -> ExecutionSurfaceMetadata { + self.mac_policy.privileged_surface_metadata() + } + pub fn invoke( &self, action: Action, diff --git a/src/pipe/handshake.rs b/src/pipe/handshake.rs index 08803b6..7ae34a4 100644 --- a/src/pipe/handshake.rs +++ b/src/pipe/handshake.rs @@ -2,7 +2,9 @@ use std::time::Duration; use uuid::Uuid; -use crate::pipe::protocol::{supported_actions, AgentMessage, BrowserMessage, PROTOCOL_VERSION}; +use crate::pipe::protocol::{ + supported_actions, AgentMessage, BrowserMessage, ExecutionSurfaceMetadata, PROTOCOL_VERSION, +}; use crate::pipe::{PipeError, Transport}; use crate::security::derive_session_key; @@ -13,6 +15,19 @@ pub struct HandshakeResult { pub capabilities: Vec, } +impl HandshakeResult { + pub fn browser_surface_metadata(&self) -> Option { + self.capabilities + .iter() + .any(|capability| capability == "browser_action") + .then(|| { + ExecutionSurfaceMetadata::privileged_browser_pipe( + "browser_host_and_mac_policy", + ) + }) + } +} + pub fn perform_handshake( transport: &T, timeout: Duration, diff --git a/src/pipe/mod.rs b/src/pipe/mod.rs index 133fa22..6fa7e32 100644 --- a/src/pipe/mod.rs +++ b/src/pipe/mod.rs @@ -5,8 +5,8 @@ pub mod protocol; pub use browser_tool::{BrowserPipeTool, CommandOutput}; pub use handshake::{perform_handshake, HandshakeResult}; pub use protocol::{ - supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage, - SecurityFields, Timing, + supported_actions, Action, AgentMessage, BrowserContext, BrowserMessage, + ConversationMessage, ExecutionSurfaceKind, ExecutionSurfaceMetadata, SecurityFields, Timing, }; use std::io::{BufRead, BufReader, Read, Write}; diff --git a/src/pipe/protocol.rs b/src/pipe/protocol.rs index c98cbae..e8298da 100644 --- a/src/pipe/protocol.rs +++ b/src/pipe/protocol.rs @@ -3,6 +3,49 @@ use serde_json::{json, Value}; pub const PROTOCOL_VERSION: &str = "1.0"; +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ExecutionSurfaceKind { + PrivilegedBrowserPipe, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct BrowserContext { + pub page_url: String, + pub page_title: String, +} + +impl BrowserContext { + pub fn is_empty(&self) -> bool { + self.page_url.trim().is_empty() && self.page_title.trim().is_empty() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ExecutionSurfaceMetadata { + pub kind: ExecutionSurfaceKind, + pub privileged: bool, + pub defines_runtime_identity: bool, + pub guard: String, + #[serde(default)] + pub allowed_domains: Vec, + #[serde(default)] + pub allowed_actions: Vec, +} + +impl ExecutionSurfaceMetadata { + pub fn privileged_browser_pipe(guard: impl Into) -> Self { + Self { + kind: ExecutionSurfaceKind::PrivilegedBrowserPipe, + privileged: true, + defines_runtime_identity: false, + guard: guard.into(), + allowed_domains: Vec::new(), + allowed_actions: Vec::new(), + } + } +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum BrowserMessage { @@ -34,6 +77,34 @@ pub enum BrowserMessage { }, } +impl BrowserMessage { + pub fn browser_context(&self) -> Option { + match self { + Self::SubmitTask { + page_url, + page_title, + .. + } => { + let context = BrowserContext { + page_url: page_url.clone(), + page_title: page_title.clone(), + }; + (!context.is_empty()).then_some(context) + } + _ => None, + } + } + + pub fn requested_surface_metadata(&self) -> Option { + match self { + Self::SubmitTask { .. } => Some(ExecutionSurfaceMetadata::privileged_browser_pipe( + "browser_host_and_mac_policy", + )), + _ => None, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ConversationMessage { pub role: String, @@ -71,6 +142,7 @@ pub enum Action { Type, Navigate, GetText, + Eval, GetHtml, WaitForSelector, PageScreenshot, @@ -90,6 +162,7 @@ impl Action { Action::Type => "type", Action::Navigate => "navigate", Action::GetText => "getText", + Action::Eval => "eval", Action::GetHtml => "getHtml", Action::WaitForSelector => "waitForSelector", Action::PageScreenshot => "pageScreenshot", @@ -122,6 +195,7 @@ pub fn supported_actions() -> Vec { Action::Type, Action::Navigate, Action::GetText, + Action::Eval, Action::GetHtml, Action::WaitForSelector, Action::PageScreenshot, diff --git a/src/runtime/engine.rs b/src/runtime/engine.rs index 1007789..1c08a2e 100644 --- a/src/runtime/engine.rs +++ b/src/runtime/engine.rs @@ -21,7 +21,7 @@ const READ_SKILL_TOOL_NAME: &str = "read_skill"; const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office"; const SCREEN_HTML_EXPORT_TOOL_NAME: &str = "screen_html_export"; const BROWSER_TOOL_CONTRACT_PROMPT: &str = "SuperRPA browser interface contract:\n- Use superrpa_browser as the preferred dedicated SuperRPA interface inside this browser host.\n- browser_action is a legacy alias with the same contract; prefer superrpa_browser when choosing between them.\n- Browser actions allowed by policy are already approved by the user inside this BrowserAttached host.\n- Do not claim a browser action was denied, blocked, or rejected unless an actual tool call returns an error.\n- expected_domain must be the bare hostname only, for example www.zhihu.com.\n- Never include scheme, path, query, fragment, or port in expected_domain.\n- selector values are executed with document.querySelector(...), so they must be valid CSS selectors only.\n- Never use XPath selectors or jQuery-style :contains().\n- Prefer direct navigation to canonical URLs when they are known, instead of clicking text links to reach common pages.\n- If you need broad page content, use getText with a valid CSS selector such as body or a stable container.\n- If a task matches an installed skill, load that skill first and then execute it through the SuperRPA interface."; -const ZHIHU_HOTLIST_EXECUTION_PROMPT: &str = "Zhihu hotlist execution contract:\n- Treat Zhihu hotlist export/presentation requests as a real browser workflow, not as a text-only summarization task.\n- You must attempt the browser workflow before concluding failure; a prose-only answer is invalid for this workflow.\n- If the current page is not already `https://www.zhihu.com/hot`, navigate there first.\n- Collect the live list with superrpa_browser using `getText` on `main` first; only fall back to `body` or `html` if `main` is unavailable.\n- Extract ordered rows containing `rank`, `title`, and `heat` from the live page text.\n- Do not use shell, web_fetch, web_search_tool, or fabricated sample data for this workflow.\n- Do not repeat the same sentence or section in your final answer."; +const ZHIHU_HOTLIST_EXECUTION_PROMPT: &str = "Zhihu hotlist execution contract:\n- Treat Zhihu hotlist export/presentation requests as a real browser workflow, not as a text-only summarization task.\n- You must attempt the browser workflow before concluding failure; a prose-only answer is invalid for this workflow.\n- If the current page is not already `https://www.zhihu.com/hot`, navigate there first.\n- If the `zhihu-hotlist.extract_hotlist` skill tool is available, call it before any generic browser probing.\n- Use generic `getText` only as a last-resort fallback when the packaged extractor fails.\n- Extract ordered rows containing `rank`, `title`, and `heat` as structured data.\n- Do not use shell, web_fetch, web_search_tool, or fabricated sample data for this workflow.\n- Do not repeat the same sentence or section in your final answer."; const OFFICE_EXPORT_COMPLETION_PROMPT: &str = "Export completion contract:\n- This task requires a real Excel export.\n- After the Zhihu rows are available, you must call openxml_office before finishing.\n- Never fabricate, simulate, or invent substitute hotlist data when a live collection/export task fails.\n- If live collection fails, report the failure concisely instead of producing fake rows.\n- Do not stop after describing how you will parse or export the data.\n- Do not repeat the same sentence or section in your final answer.\n- Your final answer must include the generated local .xlsx path."; const SCREEN_EXPORT_COMPLETION_PROMPT: &str = "Presentation completion contract:\n- This task requires a real dashboard artifact.\n- After the Zhihu rows are available, you must call screen_html_export before finishing.\n- Do not stop after describing how you will render or present the data.\n- Do not repeat the same sentence or section in your final answer.\n- Your final answer must include the local .html path and the presentation object."; @@ -125,6 +125,7 @@ impl RuntimeEngine { .skills_prompt_mode(config.skills.prompt_injection_mode) .allowed_tools(self.allowed_tools_for_config( config, + skills_dir, browser_surface_present, instruction, )) @@ -204,6 +205,7 @@ impl RuntimeEngine { fn allowed_tools_for_config( &self, config: &ZeroClawConfig, + skills_dir: &Path, browser_surface_present: bool, instruction: &str, ) -> Option> { @@ -228,6 +230,11 @@ impl RuntimeEngine { if task_needs_local_file_read(instruction) { allowed_tools.push("file_read".to_string()); } + if browser_surface_present { + allowed_tools.extend(browser_script_tool_names(&load_runtime_skills( + config, skills_dir, + ))); + } allowed_tools.dedup(); if matches!(self.profile, RuntimeProfile::GeneralAssistant) && @@ -240,6 +247,20 @@ impl RuntimeEngine { } } +fn browser_script_tool_names(skills: &[zeroclaw::skills::Skill]) -> Vec { + skills + .iter() + .flat_map(|skill| { + skill + .tools + .iter() + .filter(|tool| tool.kind == "browser_script") + .map(|tool| format!("{}.{}", skill.name, tool.name)) + .collect::>() + }) + .collect() +} + fn task_needs_local_file_read(instruction: &str) -> bool { let normalized = instruction.trim(); normalized.contains("/home/") || diff --git a/src/security/mac_policy.rs b/src/security/mac_policy.rs index ed628ee..3942fc4 100644 --- a/src/security/mac_policy.rs +++ b/src/security/mac_policy.rs @@ -3,7 +3,7 @@ use std::path::Path; use serde::{Deserialize, Serialize}; -use crate::pipe::Action; +use crate::pipe::{Action, ExecutionSurfaceMetadata}; use crate::security::SecurityError; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -77,6 +77,13 @@ impl MacPolicy { Ok(()) } + pub fn privileged_surface_metadata(&self) -> ExecutionSurfaceMetadata { + let mut metadata = ExecutionSurfaceMetadata::privileged_browser_pipe("mac_policy"); + metadata.allowed_domains = self.domains.allowed.clone(); + metadata.allowed_actions = self.pipe_actions.allowed.clone(); + metadata + } + fn validate_rules(&self) -> Result<(), SecurityError> { if self.version.trim().is_empty() { return Err(SecurityError::InvalidRules( diff --git a/tests/browser_script_skill_tool_test.rs b/tests/browser_script_skill_tool_test.rs new file mode 100644 index 0000000..17b01b4 --- /dev/null +++ b/tests/browser_script_skill_tool_test.rs @@ -0,0 +1,127 @@ +mod common; + +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; +use std::time::{SystemTime, UNIX_EPOCH}; +use std::fs; + +use common::MockTransport; +use serde_json::json; +use sgclaw::compat::browser_script_skill_tool::BrowserScriptSkillTool; +use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing}; +use sgclaw::security::MacPolicy; +use zeroclaw::skills::SkillTool; +use zeroclaw::tools::Tool; + +fn test_policy() -> MacPolicy { + MacPolicy::from_json_str( + r#"{ + "version": "1.0", + "domains": { "allowed": ["www.zhihu.com"] }, + "pipe_actions": { + "allowed": ["click", "type", "navigate", "getText", "eval"], + "blocked": [] + } + }"#, + ) + .unwrap() +} + +#[tokio::test] +async fn browser_script_skill_tool_executes_packaged_script_via_eval() { + let skill_dir = unique_temp_dir("sgclaw-browser-script-skill"); + let scripts_dir = skill_dir.join("scripts"); + fs::create_dir_all(&scripts_dir).unwrap(); + fs::write( + scripts_dir.join("extract_hotlist.js"), + r#" +const topN = Number(args.top_n || 10); +return { + sheet_name: "知乎热榜", + rows: [[1, "标题", `${topN}条`]] +}; +"#, + ) + .unwrap(); + + let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response { + seq: 1, + success: true, + data: json!({ + "text": { + "sheet_name": "知乎热榜", + "rows": [[1, "标题", "10条"]] + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 5, + }, + }])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + let mut args = HashMap::new(); + args.insert("top_n".to_string(), "How many rows to extract".to_string()); + let skill_tool = SkillTool { + name: "extract_hotlist".to_string(), + description: "Extract structured hotlist rows".to_string(), + kind: "browser_script".to_string(), + command: "scripts/extract_hotlist.js".to_string(), + args, + }; + let tool = BrowserScriptSkillTool::new( + "zhihu-hotlist", + &skill_tool, + &skill_dir, + browser_tool, + ) + .unwrap(); + + let result = tool + .execute(json!({ + "expected_domain": "https://www.zhihu.com/hot", + "top_n": "10" + })) + .await + .unwrap(); + + let sent = transport.sent_messages(); + assert!(result.success); + assert_eq!( + serde_json::from_str::(&result.output).unwrap(), + json!({ + "sheet_name": "知乎热榜", + "rows": [[1, "标题", "10条"]] + }) + ); + assert!(matches!( + &sent[0], + AgentMessage::Command { + action, + params, + security, + .. + } if action == &Action::Eval + && security.expected_domain == "www.zhihu.com" + && params["script"].as_str().unwrap().contains("const args = {\"top_n\":\"10\"};") + && params["script"].as_str().unwrap().contains("return {") + )); +} + +fn unique_temp_dir(prefix: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let path = std::env::temp_dir().join(format!("{prefix}-{nanos}")); + fs::create_dir_all(&path).unwrap(); + path +} diff --git a/tests/browser_tool_test.rs b/tests/browser_tool_test.rs index 25a07f0..ee88a6d 100644 --- a/tests/browser_tool_test.rs +++ b/tests/browser_tool_test.rs @@ -5,7 +5,9 @@ use std::sync::Arc; use std::time::Duration; use common::MockTransport; -use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing}; +use sgclaw::pipe::{ + Action, AgentMessage, BrowserMessage, BrowserPipeTool, ExecutionSurfaceKind, Timing, +}; use sgclaw::security::MacPolicy; fn test_policy() -> MacPolicy { @@ -84,6 +86,20 @@ fn browser_tool_rejects_action_when_mac_policy_blocks_it() { assert!(err.to_string().contains("action is not allowed")); } +#[test] +fn browser_tool_exposes_privileged_surface_metadata_backed_by_mac_policy() { + let transport = Arc::new(MockTransport::new(vec![])); + let tool = BrowserPipeTool::new(transport, test_policy(), vec![1, 2, 3, 4]); + let metadata = tool.surface_metadata(); + + assert_eq!(metadata.kind, ExecutionSurfaceKind::PrivilegedBrowserPipe); + assert!(metadata.privileged); + assert!(!metadata.defines_runtime_identity); + assert_eq!(metadata.guard, "mac_policy"); + assert_eq!(metadata.allowed_domains, vec!["oa.example.com", "erp.example.com"]); + assert_eq!(metadata.allowed_actions, vec!["click", "type", "navigate", "getText"]); +} + #[test] fn default_rules_allow_zhihu_navigation() { let rules_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) diff --git a/tests/compat_browser_tool_test.rs b/tests/compat_browser_tool_test.rs index 485769a..8524480 100644 --- a/tests/compat_browser_tool_test.rs +++ b/tests/compat_browser_tool_test.rs @@ -8,7 +8,7 @@ use serde_json::{json, Value}; use sgclaw::security::MacPolicy; use sgclaw::{ compat::browser_tool_adapter::ZeroClawBrowserTool, - pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing}, + pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, ExecutionSurfaceKind, Timing}, }; use zeroclaw::tools::Tool; @@ -51,6 +51,17 @@ fn zeroclaw_browser_tool_schema_exposes_only_supported_safe_actions() { assert_eq!(schema["required"], json!(["action", "expected_domain"])); } +#[test] +fn zeroclaw_browser_tool_marks_browser_action_as_privileged_surface() { + let (_, tool) = build_adapter(vec![]); + let metadata = tool.surface_metadata(); + + assert_eq!(metadata.kind, ExecutionSurfaceKind::PrivilegedBrowserPipe); + assert!(metadata.privileged); + assert!(!metadata.defines_runtime_identity); + assert_eq!(metadata.guard, "mac_policy"); +} + #[tokio::test] async fn zeroclaw_browser_tool_executes_supported_actions_and_returns_observation_payload() { let (transport, tool) = build_adapter(vec![ @@ -202,6 +213,63 @@ async fn zeroclaw_browser_tool_keeps_domain_validation_in_mac_policy() { ); } +#[tokio::test] +async fn zeroclaw_browser_tool_normalizes_expected_domain_before_sending_command() { + let (transport, tool) = build_adapter(vec![ + BrowserMessage::Response { + seq: 1, + success: true, + data: json!({ "navigated": true }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 11, + }, + }, + BrowserMessage::Response { + seq: 2, + success: true, + data: json!({ "clicked": true }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 2, + exec_ms: 12, + }, + }, + ]); + + let navigate = tool + .execute(json!({ + "action": "navigate", + "expected_domain": "https://www.baidu.com/s?wd=天气", + "url": "https://www.baidu.com/s?wd=天气" + })) + .await + .unwrap(); + let click = tool + .execute(json!({ + "action": "click", + "expected_domain": "https://www.baidu.com/s?wd=天气", + "selector": "#su" + })) + .await + .unwrap(); + + let sent = transport.sent_messages(); + assert!(navigate.success); + assert!(click.success); + assert!(matches!( + &sent[0], + AgentMessage::Command { security, .. } + if security.expected_domain == "www.baidu.com" + )); + assert!(matches!( + &sent[1], + AgentMessage::Command { security, .. } + if security.expected_domain == "www.baidu.com" + )); +} + #[tokio::test] async fn zeroclaw_browser_tool_rejects_missing_required_action_parameters() { let (transport, tool) = build_adapter(vec![]); diff --git a/tests/compat_runtime_test.rs b/tests/compat_runtime_test.rs index ac3f4ae..f0e7210 100644 --- a/tests/compat_runtime_test.rs +++ b/tests/compat_runtime_test.rs @@ -47,7 +47,7 @@ fn policy_for_domains(domains: &[&str]) -> MacPolicy { "version": "1.0", "domains": { "allowed": domains }, "pipe_actions": { - "allowed": ["click", "type", "navigate", "getText", "waitForSelector"], + "allowed": ["click", "type", "navigate", "getText", "waitForSelector", "eval"], "blocked": [] } }) @@ -97,6 +97,25 @@ fn write_skill_package(skills_dir: &std::path::Path, skill_name: &str, body: &st fs::write(skill_dir.join("SKILL.md"), body).unwrap(); } +fn write_skill_manifest_package( + skills_dir: &std::path::Path, + skill_name: &str, + manifest: &str, +) -> PathBuf { + let skill_dir = skills_dir.join(skill_name); + fs::create_dir_all(&skill_dir).unwrap(); + fs::write(skill_dir.join("SKILL.toml"), manifest).unwrap(); + skill_dir +} + +fn write_skill_script(skill_dir: &std::path::Path, relative_path: &str, body: &str) { + let script_path = skill_dir.join(relative_path); + if let Some(parent) = script_path.parent() { + fs::create_dir_all(parent).unwrap(); + } + fs::write(script_path, body).unwrap(); +} + fn real_skill_lib_root() -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")) .parent() @@ -1271,6 +1290,209 @@ fn compat_runtime_allows_read_skill_under_compact_mode_policy() { assert!(tool_names.contains(&"browser_action".to_string())); assert!(tool_names.contains(&"superrpa_browser".to_string())); assert!(tool_names.contains(&"read_skill".to_string())); + assert!(tool_names.contains(&"zhihu-hotlist.extract_hotlist".to_string())); +} + +#[test] +fn compat_runtime_exposes_browser_script_skill_tools_in_browser_attached_mode() { + let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner()); + + let response = json!({ + "choices": [{ + "message": { + "content": "已看到 browser_script skill 工具" + } + }] + }); + let (base_url, requests, server_handle) = start_fake_deepseek_server(vec![response]); + + let workspace_root = temp_workspace_root(); + let default_skills_dir = workspace_root.join(".sgclaw-zeroclaw-workspace").join("skills"); + let skill_dir = write_skill_manifest_package( + &default_skills_dir, + "workspace-zhihu-skill", + r#" +[skill] +name = "workspace-zhihu-skill" +description = "Extract Zhihu hotlist rows with a packaged browser script." +version = "0.1.0" + +[[tools]] +name = "extract_hotlist" +description = "Extract structured hotlist rows from the current Zhihu page." +kind = "browser_script" +command = "scripts/extract_hotlist.js" + +[tools.args] +top_n = "How many hotlist rows to extract." +"#, + ); + write_skill_script( + &skill_dir, + "scripts/extract_hotlist.js", + "return { rows: [] };", + ); + + let mut settings = SgClawSettings::from_legacy_deepseek_fields( + "deepseek-test-key".to_string(), + base_url, + "deepseek-chat".to_string(), + None, + ) + .unwrap(); + settings.runtime_profile = RuntimeProfile::BrowserAttached; + + let transport = Arc::new(MockTransport::new(vec![])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + zhihu_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + let summary = execute_task_with_sgclaw_settings( + transport.as_ref(), + browser_tool, + "告诉我当前有哪些知乎热榜工具", + &CompatTaskContext::default(), + &workspace_root, + &settings, + ) + .unwrap(); + server_handle.join().unwrap(); + + let request_bodies = requests.lock().unwrap().clone(); + let tool_names = request_tool_names(&request_bodies[0]); + + assert_eq!(summary, "已看到 browser_script skill 工具"); + assert!(tool_names.contains(&"browser_action".to_string())); + assert!(tool_names.contains(&"superrpa_browser".to_string())); + assert!(tool_names.contains(&"read_skill".to_string())); + assert!(tool_names.contains(&"workspace-zhihu-skill.extract_hotlist".to_string())); +} + +#[test] +fn compat_runtime_executes_browser_script_skill_via_eval_without_gettext_probing() { + let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner()); + + let first_response = json!({ + "choices": [{ + "message": { + "content": "", + "tool_calls": [{ + "id": "call_1", + "type": "function", + "function": { + "name": "workspace-zhihu-skill.extract_hotlist", + "arguments": serde_json::to_string(&json!({ + "expected_domain": "www.zhihu.com", + "top_n": "10" + })).unwrap() + } + }] + } + }] + }); + let second_response = json!({ + "choices": [{ + "message": { + "content": "已执行 browser_script skill" + } + }] + }); + let (base_url, requests, server_handle) = + start_fake_deepseek_server(vec![first_response, second_response]); + + let workspace_root = temp_workspace_root(); + let default_skills_dir = workspace_root.join(".sgclaw-zeroclaw-workspace").join("skills"); + let skill_dir = write_skill_manifest_package( + &default_skills_dir, + "workspace-zhihu-skill", + r#" +[skill] +name = "workspace-zhihu-skill" +description = "Extract Zhihu hotlist rows with a packaged browser script." +version = "0.1.0" + +[[tools]] +name = "extract_hotlist" +description = "Extract structured hotlist rows from the current Zhihu page." +kind = "browser_script" +command = "scripts/extract_hotlist.js" + +[tools.args] +top_n = "How many hotlist rows to extract." +"#, + ); + write_skill_script( + &skill_dir, + "scripts/extract_hotlist.js", + r#" +const topN = Number(args.top_n || 10); +return { + source: "https://www.zhihu.com/hot", + sheet_name: "知乎热榜", + columns: ["rank", "title", "heat"], + rows: [[1, "标题", `${topN}条`]] +}; +"#, + ); + + let mut settings = SgClawSettings::from_legacy_deepseek_fields( + "deepseek-test-key".to_string(), + base_url, + "deepseek-chat".to_string(), + None, + ) + .unwrap(); + settings.runtime_profile = RuntimeProfile::BrowserAttached; + + let transport = Arc::new(MockTransport::new(vec![success_browser_response( + 1, + json!({ + "text": { + "source": "https://www.zhihu.com/hot", + "sheet_name": "知乎热榜", + "columns": ["rank", "title", "heat"], + "rows": [[1, "标题", "10条"]] + } + }), + )])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + zhihu_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + let summary = execute_task_with_sgclaw_settings( + transport.as_ref(), + browser_tool, + "用知乎热榜 skill 提取前十条结构化数据", + &CompatTaskContext::default(), + &workspace_root, + &settings, + ) + .unwrap(); + server_handle.join().unwrap(); + + let sent = transport.sent_messages(); + let request_bodies = requests.lock().unwrap().clone(); + let tool_names = request_tool_names(&request_bodies[0]); + + assert_eq!(summary, "已执行 browser_script skill"); + assert!(tool_names.contains(&"workspace-zhihu-skill.extract_hotlist".to_string())); + assert!(sent.iter().any(|message| { + matches!(message, AgentMessage::LogEntry { level, message } + if level == "info" && message == "call workspace-zhihu-skill.extract_hotlist") + })); + assert!(sent.iter().any(|message| { + matches!(message, AgentMessage::Command { action, .. } if action == &Action::Eval) + })); + assert!(!sent.iter().any(|message| { + matches!(message, AgentMessage::LogEntry { level, message } + if level == "info" && message.starts_with("getText ")) + })); } #[test] @@ -1322,6 +1544,7 @@ fn zhihu_hotlist_browser_skill_flow_does_not_expose_shell_or_glob_tools() { assert!(tool_names.contains(&"superrpa_browser".to_string())); assert!(tool_names.contains(&"browser_action".to_string())); assert!(tool_names.contains(&"read_skill".to_string())); + assert!(tool_names.contains(&"zhihu-hotlist.extract_hotlist".to_string())); assert!(!tool_names.contains(&"shell".to_string())); assert!(!tool_names.contains(&"glob_search".to_string())); } @@ -1426,6 +1649,7 @@ fn browser_attached_export_flow_exposes_browser_and_office_tools_only() { assert!(tool_names.contains(&"superrpa_browser".to_string())); assert!(tool_names.contains(&"browser_action".to_string())); assert!(tool_names.contains(&"read_skill".to_string())); + assert!(tool_names.contains(&"zhihu-hotlist.extract_hotlist".to_string())); assert!(tool_names.contains(&"openxml_office".to_string())); assert!(!tool_names.contains(&"shell".to_string())); assert!(!tool_names.contains(&"glob_search".to_string())); @@ -1480,6 +1704,7 @@ fn compat_runtime_allows_zhihu_hotlist_screen_export_tool_in_browser_profile() { assert!(tool_names.contains(&"superrpa_browser".to_string())); assert!(tool_names.contains(&"browser_action".to_string())); assert!(tool_names.contains(&"read_skill".to_string())); + assert!(tool_names.contains(&"zhihu-hotlist.extract_hotlist".to_string())); assert!(tool_names.contains(&"screen_html_export".to_string())); assert!(!tool_names.contains(&"shell".to_string())); assert!(!tool_names.contains(&"glob_search".to_string())); @@ -1706,9 +1931,10 @@ fn handle_browser_message_executes_real_zhihu_hotlist_skill_flow() { "id": "call_1", "type": "function", "function": { - "name": "read_skill", + "name": "zhihu-hotlist.extract_hotlist", "arguments": serde_json::to_string(&json!({ - "name": "zhihu-hotlist" + "expected_domain": "www.zhihu.com", + "top_n": "10" })).unwrap() } }] @@ -1716,50 +1942,14 @@ fn handle_browser_message_executes_real_zhihu_hotlist_skill_flow() { }] }); let second_response = json!({ - "choices": [{ - "message": { - "content": "", - "tool_calls": [ - { - "id": "call_2", - "type": "function", - "function": { - "name": "browser_action", - "arguments": serde_json::to_string(&json!({ - "action": "navigate", - "expected_domain": "www.zhihu.com", - "url": "https://www.zhihu.com/hot" - })).unwrap() - } - }, - { - "id": "call_3", - "type": "function", - "function": { - "name": "browser_action", - "arguments": serde_json::to_string(&json!({ - "action": "getText", - "expected_domain": "www.zhihu.com", - "selector": ".HotList-list .HotItem" - })).unwrap() - } - } - ] - } - }] - }); - let third_response = json!({ "choices": [{ "message": { "content": "已完成知乎热榜采集" } }] }); - let (base_url, requests, server_handle) = start_fake_deepseek_server(vec![ - first_response, - second_response, - third_response, - ]); + let (base_url, requests, server_handle) = + start_fake_deepseek_server(vec![first_response, second_response]); let workspace_root = temp_workspace_root(); let skills_dir = real_skill_lib_root(); @@ -1772,13 +1962,14 @@ fn handle_browser_message_executes_real_zhihu_hotlist_skill_flow() { ); let runtime_context = AgentRuntimeContext::new(Some(config_path), workspace_root.clone()); - let transport = Arc::new(MockTransport::new(vec![ - success_browser_response(1, json!({ "navigated": true })), - success_browser_response( - 2, - json!({ "text": "热榜项目 1\n热榜项目 2\n热榜项目 3" }), - ), - ])); + let transport = Arc::new(MockTransport::new(vec![success_browser_response(1, json!({ + "text": { + "source": "https://www.zhihu.com/hot", + "sheet_name": "知乎热榜", + "columns": ["rank", "title", "heat"], + "rows": [[1, "热榜项目 1", "1707万"], [2, "热榜项目 2", "1150万"]] + } + }))])); let browser_tool = BrowserPipeTool::new( transport.clone(), zhihu_test_policy(), @@ -1816,32 +2007,22 @@ fn handle_browser_message_executes_real_zhihu_hotlist_skill_flow() { matches!( message, AgentMessage::LogEntry { level, message } - if level == "info" && message == "read_skill zhihu-hotlist@0.1.0" + if level == "info" && message == "call zhihu-hotlist.extract_hotlist" ) })); assert!(sent.iter().any(|message| { matches!( message, AgentMessage::Command { action, params, .. } - if action == &Action::Navigate && - params["url"].as_str() == Some("https://www.zhihu.com/hot") + if action == &Action::Eval && + params["script"].as_str().unwrap_or_default().contains("columns: ['rank', 'title', 'heat']") ) })); - assert!(sent.iter().any(|message| { - matches!( - message, - AgentMessage::Command { action, params, .. } - if action == &Action::GetText && - params["selector"].as_str() == Some(".HotList-list .HotItem") - ) - })); - assert_eq!(request_bodies.len(), 3); - assert!(tool_content.len() > 100); - assert!(tool_content.contains("hot list items")); - assert!(tool_content.contains("Export Artifact")); - assert!(tool_content.contains("\"sheet_name\": \"知乎热榜\"")); - assert!(tool_content.contains("\"columns\": [\"rank\", \"title\", \"heat\"]")); - assert!(tool_content.contains("structured artifact is primary")); + assert_eq!(request_bodies.len(), 2); + assert!(tool_content.contains("知乎热榜")); + assert!(tool_content.contains("rank")); + assert!(tool_content.contains("heat")); + assert!(tool_content.contains("热榜项目 1")); } #[test] @@ -1859,30 +2040,10 @@ fn handle_browser_message_chains_hotlist_skill_into_office_export_tool() { "id": "call_1", "type": "function", "function": { - "name": "superrpa_browser", + "name": "zhihu-hotlist.extract_hotlist", "arguments": serde_json::to_string(&json!({ - "action": "navigate", "expected_domain": "www.zhihu.com", - "url": "https://www.zhihu.com/hot" - })).unwrap() - } - }] - } - }] - }); - let second_response = json!({ - "choices": [{ - "message": { - "content": "", - "tool_calls": [{ - "id": "call_2", - "type": "function", - "function": { - "name": "superrpa_browser", - "arguments": serde_json::to_string(&json!({ - "action": "getText", - "expected_domain": "www.zhihu.com", - "selector": "main" + "top_n": "10" })).unwrap() } }] @@ -1921,7 +2082,6 @@ fn handle_browser_message_chains_hotlist_skill_into_office_export_tool() { }); let (base_url, _requests, server_handle) = start_fake_deepseek_server(vec![ first_response, - second_response, third_response, fourth_response, ]); @@ -1935,11 +2095,14 @@ fn handle_browser_message_chains_hotlist_skill_into_office_export_tool() { let runtime_context = AgentRuntimeContext::new(Some(config_path), workspace_root.clone()); let transport = Arc::new(MockTransport::new(vec![ - success_browser_response(1, json!({ "navigated": true })), - success_browser_response( - 2, - json!({ "text": "知乎热榜\n1\n问题一\n344万热度\n2\n问题二\n266万热度" }), - ), + success_browser_response(1, json!({ + "text": { + "source": "https://www.zhihu.com/hot", + "sheet_name": "知乎热榜", + "columns": ["rank", "title", "heat"], + "rows": [[1, "问题一", "344万"], [2, "问题二", "266万"]] + } + })), ])); let browser_tool = BrowserPipeTool::new( transport.clone(), @@ -1979,6 +2142,19 @@ fn handle_browser_message_chains_hotlist_skill_into_office_export_tool() { if level == "mode" && message == "zeroclaw_process_message_primary" ) })); + assert!(sent.iter().any(|message| { + matches!( + message, + AgentMessage::LogEntry { level, message } + if level == "info" && message == "call zhihu-hotlist.extract_hotlist" + ) + })); + assert!(sent.iter().any(|message| { + matches!( + message, + AgentMessage::Command { action, .. } if action == &Action::Eval + ) + })); assert!(!sent.iter().any(|message| { matches!( message, diff --git a/tests/pipe_handshake_test.rs b/tests/pipe_handshake_test.rs index 84d5f5d..55ee267 100644 --- a/tests/pipe_handshake_test.rs +++ b/tests/pipe_handshake_test.rs @@ -3,7 +3,7 @@ mod common; use std::time::Duration; use common::MockTransport; -use sgclaw::pipe::{perform_handshake, AgentMessage, BrowserMessage}; +use sgclaw::pipe::{perform_handshake, AgentMessage, BrowserMessage, ExecutionSurfaceKind}; #[test] fn handshake_reads_init_and_writes_init_ack() { @@ -24,7 +24,10 @@ fn handshake_reads_init_and_writes_init_ack() { version, agent_id, supported_actions - } if version == "1.0" && !agent_id.is_empty() && supported_actions.len() >= 4 + } if version == "1.0" && + !agent_id.is_empty() && + supported_actions.iter().any(|action| action == &sgclaw::pipe::Action::Click) && + supported_actions.iter().any(|action| action.as_str() == "eval") )); } @@ -39,3 +42,21 @@ fn handshake_rejects_version_mismatch() { let err = perform_handshake(&transport, Duration::from_secs(5)).unwrap_err(); assert!(err.to_string().contains("unsupported protocol version")); } + +#[test] +fn handshake_capabilities_report_browser_surface_without_redefining_runtime() { + let transport = MockTransport::new(vec![BrowserMessage::Init { + version: "1.0".to_string(), + hmac_seed: "0123456789abcdef".to_string(), + capabilities: vec!["browser_action".to_string()], + }]); + + let result = perform_handshake(&transport, Duration::from_secs(5)).unwrap(); + let metadata = result + .browser_surface_metadata() + .expect("expected browser surface metadata"); + + assert_eq!(metadata.kind, ExecutionSurfaceKind::PrivilegedBrowserPipe); + assert!(metadata.privileged); + assert!(!metadata.defines_runtime_identity); +} diff --git a/tests/pipe_protocol_test.rs b/tests/pipe_protocol_test.rs index 6613a81..2acdc6c 100644 --- a/tests/pipe_protocol_test.rs +++ b/tests/pipe_protocol_test.rs @@ -1,4 +1,6 @@ -use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, SecurityFields, Timing}; +use sgclaw::pipe::{ + Action, AgentMessage, BrowserMessage, ExecutionSurfaceKind, SecurityFields, Timing, +}; #[test] fn browser_init_round_trip_uses_frozen_wire_format() { @@ -57,3 +59,32 @@ fn response_deserializes_timing_and_payload() { } ); } + +#[test] +fn submit_task_exposes_browser_context_without_implying_browser_only_runtime() { + let message = BrowserMessage::SubmitTask { + instruction: "统计一下知乎热榜".to_string(), + conversation_id: "conversation-1".to_string(), + messages: vec![], + page_url: "https://www.zhihu.com/hot".to_string(), + page_title: "知乎热榜".to_string(), + }; + + let context = message.browser_context().expect("browser context"); + let surface = message + .requested_surface_metadata() + .expect("surface metadata"); + + assert_eq!(context.page_url, "https://www.zhihu.com/hot"); + assert_eq!(context.page_title, "知乎热榜"); + assert_eq!(surface.kind, ExecutionSurfaceKind::PrivilegedBrowserPipe); + assert!(surface.privileged); + assert!(!surface.defines_runtime_identity); +} + +#[test] +fn supported_actions_include_browser_script_execution() { + let supported = sgclaw::pipe::supported_actions(); + + assert!(supported.iter().any(|action| action.as_str() == "eval")); +} diff --git a/tests/skill_lib_validation_test.py b/tests/skill_lib_validation_test.py index 61644dc..f256b7a 100644 --- a/tests/skill_lib_validation_test.py +++ b/tests/skill_lib_validation_test.py @@ -53,13 +53,18 @@ class SkillLibValidationTest(unittest.TestCase): if name == "office-export-xlsx": self.assertIn("office", record.tags) self.assertIn("xlsx", record.tags) - self.assertEqual(record.location, SKILLS_DIR / name / "SKILL.md") + expected_location = ( + SKILLS_DIR / name / "SKILL.toml" + if name == "zhihu-hotlist" + else SKILLS_DIR / name / "SKILL.md" + ) + self.assertEqual(record.location, expected_location) self.assertTrue(record.prompt_body.lstrip().startswith("# ")) self.assertNotIn("\n---\n", record.prompt_body) - def test_each_skill_passes_audit_without_scripts(self): + def test_each_skill_passes_audit_with_current_script_policy(self): for skill_dir in self.validator.discover_skill_dirs(): - report = self.validator.audit_skill_directory(skill_dir, allow_scripts=False) + report = self.validator.audit_skill_directory(skill_dir, allow_scripts=True) self.assertEqual( report.findings, [], @@ -69,9 +74,15 @@ class SkillLibValidationTest(unittest.TestCase): def test_current_packages_keep_required_structure(self): for name in EXPECTED_SKILL_NAMES: skill_dir = SKILLS_DIR / name - self.assertTrue((skill_dir / "SKILL.md").is_file()) + self.assertTrue( + (skill_dir / "SKILL.md").is_file() or (skill_dir / "SKILL.toml").is_file() + ) self.assertTrue((skill_dir / "references").is_dir()) self.assertTrue((skill_dir / "assets").is_dir()) + self.assertTrue((SKILLS_DIR / "zhihu-hotlist" / "SKILL.toml").is_file()) + self.assertTrue( + (SKILLS_DIR / "zhihu-hotlist" / "scripts" / "extract_hotlist.js").is_file() + ) def test_each_skill_declares_superrpa_browser_contract(self): for name in [name for name in EXPECTED_SKILL_NAMES if name.startswith("zhihu-")]: @@ -106,7 +117,7 @@ class SkillLibValidationTest(unittest.TestCase): self.assertIn("presentation", content) def test_validate_all_skills_reports_pass(self): - results = self.validator.validate_all_skills(allow_scripts=False) + results = self.validator.validate_all_skills(allow_scripts=True) self.assertEqual([result.record.name for result in results], EXPECTED_SKILL_NAMES) self.assertTrue(all(result.ok for result in results)) diff --git a/third_party/zeroclaw/src/skills/mod.rs b/third_party/zeroclaw/src/skills/mod.rs index b12fd89..a76fc49 100644 --- a/third_party/zeroclaw/src/skills/mod.rs +++ b/third_party/zeroclaw/src/skills/mod.rs @@ -810,18 +810,18 @@ pub fn skills_to_prompt_with_mode( } if !skill.tools.is_empty() { - // Tools with known kinds (shell, script, http) are registered as + // Tools with known kinds (shell, script, http, browser_script) are registered as // callable tool specs and can be invoked directly via function calling. // We note them here for context but mark them as callable. let registered: Vec<_> = skill .tools .iter() - .filter(|t| matches!(t.kind.as_str(), "shell" | "script" | "http")) + .filter(|t| matches!(t.kind.as_str(), "shell" | "script" | "http" | "browser_script")) .collect(); let unregistered: Vec<_> = skill .tools .iter() - .filter(|t| !matches!(t.kind.as_str(), "shell" | "script" | "http")) + .filter(|t| !matches!(t.kind.as_str(), "shell" | "script" | "http" | "browser_script")) .collect(); if !registered.is_empty() { @@ -887,6 +887,7 @@ pub fn skills_to_tools( tool, ))); } + "browser_script" => {} other => { tracing::warn!( "Unknown skill tool kind '{}' for {}.{}, skipping", @@ -1900,6 +1901,32 @@ description = "Bare minimum" assert!(prompt.contains("Fetch forecast")); } + #[test] + fn skills_to_prompt_marks_browser_script_tools_as_callable() { + let skills = vec![Skill { + name: "zhihu-hotlist".to_string(), + description: "Collect hotlist rows".to_string(), + version: "1.0.0".to_string(), + author: None, + tags: vec![], + tools: vec![SkillTool { + name: "extract_hotlist".to_string(), + description: "Extract structured hotlist rows from the current page".to_string(), + kind: "browser_script".to_string(), + command: "scripts/extract_hotlist.js".to_string(), + args: HashMap::new(), + }], + prompts: vec![], + location: None, + }]; + + let prompt = skills_to_prompt(&skills, Path::new("/tmp")); + + assert!(prompt.contains("zhihu-hotlist.extract_hotlist")); + assert!(!prompt.contains("browser_script")); + } + #[test] fn skills_to_prompt_escapes_xml_content() { let skills = vec![Skill { diff --git a/third_party/zeroclaw/src/tools/read_skill.rs b/third_party/zeroclaw/src/tools/read_skill.rs index 0316e1c..657840c 100644 --- a/third_party/zeroclaw/src/tools/read_skill.rs +++ b/third_party/zeroclaw/src/tools/read_skill.rs @@ -168,6 +168,23 @@ pub async fn read_skill_bundle(location: &Path) -> std::io::Result { &mut pending, ); + if location.file_name().and_then(|name| name.to_str()) == Some("SKILL.toml") { + let sibling_markdown = skill_root.join("SKILL.md"); + if sibling_markdown.exists() { + if let Ok(markdown) = tokio::fs::read_to_string(&sibling_markdown).await { + output.push_str("\n\n## Referenced File: SKILL.md\n\n"); + output.push_str(&markdown); + enqueue_reference_paths( + &markdown, + sibling_markdown.parent().unwrap_or(skill_root.as_path()), + &skill_root, + &mut queued, + &mut pending, + ); + } + } + } + while let Some(path) = pending.pop_front() { let canonical = path.canonicalize().unwrap_or(path.clone()); if !canonical.starts_with(&skill_root) || !appended.insert(canonical.clone()) {