wip: checkpoint 2026-03-29 runtime work

2026-03-29 22:44:30 +08:00
parent 7d9036b2d4
commit e294fbb9b1
30 changed files with 6759 additions and 161 deletions
--- a/tools/live_acceptance/run_zhihu_hotlist_excel_acceptance.py
+++ b/tools/live_acceptance/run_zhihu_hotlist_excel_acceptance.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import queue
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import zipfile
+from dataclasses import dataclass
+from pathlib import Path
+
+import requests
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+SGCLAW_BIN = REPO_ROOT / "target" / "debug" / "sgclaw"
+REAL_CONFIG_PATH = Path("/home/zyl/.config/superrpa/Default/superrpa/sgclaw_config.json")
+ACCEPTANCE_DOC = REPO_ROOT / "docs" / "acceptance" / "2026-03-29-zhihu-hotlist-excel.md"
+ZH_HOTLIST_API = "https://www.zhihu.com/api/v3/feed/topstory/hot-list-web?limit=10&desktop=true"
+HANDSHAKE_SEED = "00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff"
+
+
+@dataclass
+class HotItem:
+    rank: int
+    title: str
+    heat: str
+
+
+def main() -> int:
+    ensure_binary()
+    hot_items = fetch_live_hotlist()
+    result = run_live_acceptance(hot_items)
+    score = score_acceptance(result, hot_items)
+    write_acceptance_doc(result, hot_items, score)
+    print(json.dumps(score, ensure_ascii=False, indent=2))
+    print(f"evidence written to {ACCEPTANCE_DOC}")
+    return 0 if score["total_score"] >= 85 else 1
+
+
+def ensure_binary() -> None:
+    if SGCLAW_BIN.exists():
+        return
+    subprocess.run(["cargo", "build", "--bin", "sgclaw"], cwd=REPO_ROOT, check=True)
+
+
+def fetch_live_hotlist() -> list[HotItem]:
+    response = requests.get(
+        ZH_HOTLIST_API,
+        headers={"User-Agent": "Mozilla/5.0", "Referer": "https://www.zhihu.com/hot"},
+        timeout=20,
+    )
+    response.raise_for_status()
+    data = response.json()["data"]
+    items = []
+    for index, entry in enumerate(data[:10], start=1):
+        target = entry.get("target", {})
+        title = target.get("title_area", {}).get("text", "").strip()
+        raw_heat = target.get("metrics_area", {}).get("text", "").strip()
+        heat = normalize_heat_text(raw_heat)
+        if not title or not heat:
+            raise RuntimeError(f"missing title/heat in live hotlist entry {index}")
+        items.append(HotItem(rank=index, title=title, heat=heat))
+    return items
+
+
+def normalize_heat_text(text: str) -> str:
+    compact = re.sub(r"\s+", "", text)
+    compact = compact.removesuffix("热度")
+    return compact
+
+
+def build_hotlist_text(items: list[HotItem]) -> str:
+    lines = []
+    for item in items:
+        lines.append(f"{item.rank}. {item.title}")
+        lines.append(f"热度 {item.heat}")
+    return "\n".join(lines)
+
+
+def write_temp_config(workspace_root: Path) -> Path:
+    source = json.loads(REAL_CONFIG_PATH.read_text(encoding="utf-8"))
+    config_path = workspace_root / "sgclaw_config.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "apiKey": source["apiKey"],
+                "baseUrl": source["baseUrl"],
+                "model": source["model"],
+                "skillsDir": source["skillsDir"],
+            },
+            ensure_ascii=False,
+            indent=2,
+        ),
+        encoding="utf-8",
+    )
+    return config_path
+
+
+def run_live_acceptance(items: list[HotItem]) -> dict:
+    workspace_root = Path(tempfile.mkdtemp(prefix="sgclaw-live-acceptance-"))
+    config_path = write_temp_config(workspace_root)
+    existing_exports = set(workspace_root.rglob("*.xlsx"))
+    hotlist_text = build_hotlist_text(items)
+
+    child = subprocess.Popen(
+        [str(SGCLAW_BIN), "--config-path", str(config_path)],
+        stdin=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        cwd=REPO_ROOT,
+        bufsize=1,
+    )
+
+    stdout_queue: queue.Queue[str] = queue.Queue()
+    stderr_lines: list[str] = []
+    start_reader(child.stdout, stdout_queue)
+    start_reader(child.stderr, None, stderr_lines)
+
+    send_line(
+        child,
+        {
+            "type": "init",
+            "version": "1.0",
+            "hmac_seed": HANDSHAKE_SEED,
+            "capabilities": ["browser_action"],
+        },
+    )
+    init_ack = read_json_line(stdout_queue, timeout=10)
+    if init_ack.get("type") != "init_ack":
+        raise RuntimeError(f"unexpected init response: {init_ack}")
+
+    send_line(
+        child,
+        {
+            "type": "submit_task",
+            "instruction": "读取知乎热榜数据，并导出 excel 文件",
+            "conversation_id": "",
+            "messages": [],
+            "page_url": "https://www.zhihu.com/",
+            "page_title": "知乎",
+        },
+    )
+
+    logs: list[dict] = []
+    final_task = None
+    current_page = "https://www.zhihu.com/"
+    deadline = time.time() + 180
+
+    while time.time() < deadline:
+        try:
+            message = read_json_line(stdout_queue, timeout=5)
+        except queue.Empty:
+            if child.poll() is not None:
+                break
+            continue
+        msg_type = message.get("type")
+        if msg_type == "log_entry":
+            logs.append(message)
+            continue
+        if msg_type == "command":
+            action = message["action"]
+            params = message.get("params", {})
+            seq = message["seq"]
+            if action == "navigate":
+                current_page = params.get("url", current_page)
+                respond_browser(child, seq, {"navigated": True, "url": current_page})
+                continue
+            if action == "click":
+                selector = params.get("selector", "")
+                if "hot" in selector:
+                    current_page = "https://www.zhihu.com/hot"
+                respond_browser(child, seq, {"clicked": True, "selector": selector})
+                continue
+            if action == "getText":
+                text = hotlist_text if "zhihu.com" in current_page else ""
+                respond_browser(child, seq, {"text": text})
+                continue
+            if action == "type":
+                respond_browser(child, seq, {"typed": True})
+                continue
+            respond_browser(child, seq, {"unsupported_action": action}, success=False)
+            continue
+        if msg_type == "task_complete":
+            final_task = message
+            break
+
+    try:
+        child.terminate()
+        child.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        child.kill()
+        child.wait(timeout=5)
+
+    exports = sorted(set(workspace_root.rglob("*.xlsx")) - existing_exports)
+    return {
+        "workspace_root": str(workspace_root),
+        "init_ack": init_ack,
+        "logs": logs,
+        "final_task": final_task,
+        "stderr": stderr_lines,
+        "exports": [str(path) for path in exports],
+    }
+
+
+def start_reader(stream, output_queue: queue.Queue[str] | None, collector: list[str] | None = None) -> None:
+    def _reader() -> None:
+        try:
+            for line in stream:
+                if collector is not None:
+                    collector.append(line.rstrip("\n"))
+                if output_queue is not None:
+                    output_queue.put(line)
+        finally:
+            stream.close()
+
+    thread = threading.Thread(target=_reader, daemon=True)
+    thread.start()
+
+
+def send_line(child: subprocess.Popen, payload: dict) -> None:
+    assert child.stdin is not None
+    child.stdin.write(json.dumps(payload, ensure_ascii=False) + "\n")
+    child.stdin.flush()
+
+
+def respond_browser(child: subprocess.Popen, seq: int, data: dict, success: bool = True) -> None:
+    send_line(
+        child,
+        {
+            "type": "response",
+            "seq": seq,
+            "success": success,
+            "data": data,
+            "aom_snapshot": [],
+            "timing": {"queue_ms": 1, "exec_ms": 10},
+        },
+    )
+
+
+def read_json_line(output_queue: queue.Queue[str], timeout: int) -> dict:
+    raw = output_queue.get(timeout=timeout)
+    return json.loads(raw)
+
+
+def score_acceptance(result: dict, items: list[HotItem]) -> dict:
+    logs = [entry.get("message", "") for entry in result["logs"]]
+    final_task = result.get("final_task") or {}
+    exports = [Path(path) for path in result["exports"]]
+    exported_path = resolve_exported_path(exports, final_task.get("summary", ""))
+
+    skill_selection = 0
+    executed_hotlist_collection = (
+        "navigate https://www.zhihu.com/hot" in logs and
+        any(message.startswith("getText ") for message in logs)
+    )
+    read_hotlist_skill = "read_skill zhihu-hotlist" in logs
+    read_office_skill = "read_skill office-export-xlsx" in logs
+    completed_office_export = "call openxml_office" in logs
+
+    if read_hotlist_skill or executed_hotlist_collection:
+        skill_selection += 15
+    if read_office_skill or completed_office_export:
+        skill_selection += 15
+    if read_hotlist_skill and read_office_skill and \
+        logs.index("read_skill zhihu-hotlist") > logs.index("read_skill office-export-xlsx"):
+        skill_selection = max(0, skill_selection - 15)
+
+    tool_discipline = 25
+    if any(message == "call shell" for message in logs):
+        tool_discipline -= 15
+    if any(message == "call glob_search" for message in logs):
+        tool_discipline -= 10
+    if any(message == "call file_read" for message in logs):
+        tool_discipline -= 10
+    tool_discipline = max(0, tool_discipline)
+
+    hotlist_data_correctness = 0
+    xlsx_export_success = 0
+    workbook_ok = False
+    if exported_path and exported_path.exists():
+        with zipfile.ZipFile(exported_path) as archive:
+            sheet_xml = archive.read("xl/worksheets/sheet1.xml").decode("utf-8")
+            workbook_xml = archive.read("xl/workbook.xml").decode("utf-8")
+        title_matches = sum(1 for item in items if item.title in sheet_xml)
+        heat_matches = sum(1 for item in items if item.heat in sheet_xml)
+        if title_matches >= 10 and heat_matches >= 10:
+            hotlist_data_correctness = 20
+        elif title_matches >= 8 and heat_matches >= 8:
+            hotlist_data_correctness = 15
+        elif title_matches >= 5 and heat_matches >= 5:
+            hotlist_data_correctness = 10
+        workbook_ok = "知乎热榜" in workbook_xml and title_matches >= 10
+        if workbook_ok:
+            xlsx_export_success = 20
+
+    final_response_quality = 0
+    summary = final_task.get("summary", "")
+    if final_task.get("success") and summary.strip():
+        final_response_quality = 5
+
+    deductions = []
+    if not exported_path:
+        deductions.append("export missing output path")
+
+    total_score = (
+        skill_selection
+        + tool_discipline
+        + hotlist_data_correctness
+        + xlsx_export_success
+        + final_response_quality
+    )
+
+    return {
+        "total_score": total_score,
+        "skill_selection": skill_selection,
+        "tool_discipline": tool_discipline,
+        "hotlist_data_correctness": hotlist_data_correctness,
+        "xlsx_export_success": xlsx_export_success,
+        "final_response_quality": final_response_quality,
+        "final_success": bool(final_task.get("success")),
+        "final_summary": summary,
+        "exported_path": str(exported_path) if exported_path else "",
+        "deductions": deductions,
+        "logs": logs,
+        "stderr": result["stderr"],
+    }
+
+
+def resolve_exported_path(exports: list[Path], summary: str) -> Path | None:
+    match = re.search(r"(/[^\s`]+\.xlsx)", summary)
+    if match:
+        candidate = Path(match.group(1))
+        if candidate.exists():
+            return candidate
+
+    filtered = [
+        path
+        for path in exports
+        if path.name != "zhihu_hotlist_template.xlsx"
+    ]
+    if filtered:
+        return sorted(filtered)[-1]
+    return None
+
+
+def write_acceptance_doc(result: dict, items: list[HotItem], score: dict) -> None:
+    ACCEPTANCE_DOC.parent.mkdir(parents=True, exist_ok=True)
+    lines = [
+        "# Zhihu Hotlist Excel Acceptance",
+        "",
+        f"- Date: {time.strftime('%Y-%m-%d %H:%M:%S %z')}",
+        "- Mode: real provider + live Zhihu hotlist API + simulated browser pipe",
+        f"- Workspace: `{result['workspace_root']}`",
+        f"- Final success: `{score['final_success']}`",
+        f"- Total score: `{score['total_score']}/100`",
+        "",
+        "## Rubric",
+        "",
+        f"- skill selection: `{score['skill_selection']}/30`",
+        f"- tool discipline: `{score['tool_discipline']}/25`",
+        f"- hotlist data correctness: `{score['hotlist_data_correctness']}/20`",
+        f"- xlsx export success: `{score['xlsx_export_success']}/20`",
+        f"- final response quality: `{score['final_response_quality']}/5`",
+        "",
+        "## Final Output",
+        "",
+        f"- exported_path: `{score['exported_path']}`",
+        f"- final_summary: `{score['final_summary']}`",
+        "",
+        "## Skill Logs",
+        "",
+    ]
+    for message in score["logs"]:
+        lines.append(f"- `{message}`")
+    lines.extend(
+        [
+            "",
+            "## Live Hotlist Sample",
+            "",
+        ]
+    )
+    for item in items:
+        lines.append(f"- {item.rank}. {item.title} | {item.heat}")
+    if score["stderr"]:
+        lines.extend(["", "## Stderr", ""])
+        for line in score["stderr"]:
+            lines.append(f"- `{line}`")
+    ACCEPTANCE_DOC.write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+
+if __name__ == "__main__":
+    try:
+        raise SystemExit(main())
+    except KeyboardInterrupt:
+        raise SystemExit(130)