wip: checkpoint 2026-03-29 runtime work
This commit is contained in:
402
tools/live_acceptance/run_zhihu_hotlist_excel_acceptance.py
Normal file
402
tools/live_acceptance/run_zhihu_hotlist_excel_acceptance.py
Normal file
@@ -0,0 +1,402 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||
SGCLAW_BIN = REPO_ROOT / "target" / "debug" / "sgclaw"
|
||||
REAL_CONFIG_PATH = Path("/home/zyl/.config/superrpa/Default/superrpa/sgclaw_config.json")
|
||||
ACCEPTANCE_DOC = REPO_ROOT / "docs" / "acceptance" / "2026-03-29-zhihu-hotlist-excel.md"
|
||||
ZH_HOTLIST_API = "https://www.zhihu.com/api/v3/feed/topstory/hot-list-web?limit=10&desktop=true"
|
||||
HANDSHAKE_SEED = "00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff"
|
||||
|
||||
|
||||
@dataclass
|
||||
class HotItem:
|
||||
rank: int
|
||||
title: str
|
||||
heat: str
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ensure_binary()
|
||||
hot_items = fetch_live_hotlist()
|
||||
result = run_live_acceptance(hot_items)
|
||||
score = score_acceptance(result, hot_items)
|
||||
write_acceptance_doc(result, hot_items, score)
|
||||
print(json.dumps(score, ensure_ascii=False, indent=2))
|
||||
print(f"evidence written to {ACCEPTANCE_DOC}")
|
||||
return 0 if score["total_score"] >= 85 else 1
|
||||
|
||||
|
||||
def ensure_binary() -> None:
|
||||
if SGCLAW_BIN.exists():
|
||||
return
|
||||
subprocess.run(["cargo", "build", "--bin", "sgclaw"], cwd=REPO_ROOT, check=True)
|
||||
|
||||
|
||||
def fetch_live_hotlist() -> list[HotItem]:
|
||||
response = requests.get(
|
||||
ZH_HOTLIST_API,
|
||||
headers={"User-Agent": "Mozilla/5.0", "Referer": "https://www.zhihu.com/hot"},
|
||||
timeout=20,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()["data"]
|
||||
items = []
|
||||
for index, entry in enumerate(data[:10], start=1):
|
||||
target = entry.get("target", {})
|
||||
title = target.get("title_area", {}).get("text", "").strip()
|
||||
raw_heat = target.get("metrics_area", {}).get("text", "").strip()
|
||||
heat = normalize_heat_text(raw_heat)
|
||||
if not title or not heat:
|
||||
raise RuntimeError(f"missing title/heat in live hotlist entry {index}")
|
||||
items.append(HotItem(rank=index, title=title, heat=heat))
|
||||
return items
|
||||
|
||||
|
||||
def normalize_heat_text(text: str) -> str:
|
||||
compact = re.sub(r"\s+", "", text)
|
||||
compact = compact.removesuffix("热度")
|
||||
return compact
|
||||
|
||||
|
||||
def build_hotlist_text(items: list[HotItem]) -> str:
|
||||
lines = []
|
||||
for item in items:
|
||||
lines.append(f"{item.rank}. {item.title}")
|
||||
lines.append(f"热度 {item.heat}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def write_temp_config(workspace_root: Path) -> Path:
|
||||
source = json.loads(REAL_CONFIG_PATH.read_text(encoding="utf-8"))
|
||||
config_path = workspace_root / "sgclaw_config.json"
|
||||
config_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"apiKey": source["apiKey"],
|
||||
"baseUrl": source["baseUrl"],
|
||||
"model": source["model"],
|
||||
"skillsDir": source["skillsDir"],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return config_path
|
||||
|
||||
|
||||
def run_live_acceptance(items: list[HotItem]) -> dict:
|
||||
workspace_root = Path(tempfile.mkdtemp(prefix="sgclaw-live-acceptance-"))
|
||||
config_path = write_temp_config(workspace_root)
|
||||
existing_exports = set(workspace_root.rglob("*.xlsx"))
|
||||
hotlist_text = build_hotlist_text(items)
|
||||
|
||||
child = subprocess.Popen(
|
||||
[str(SGCLAW_BIN), "--config-path", str(config_path)],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
cwd=REPO_ROOT,
|
||||
bufsize=1,
|
||||
)
|
||||
|
||||
stdout_queue: queue.Queue[str] = queue.Queue()
|
||||
stderr_lines: list[str] = []
|
||||
start_reader(child.stdout, stdout_queue)
|
||||
start_reader(child.stderr, None, stderr_lines)
|
||||
|
||||
send_line(
|
||||
child,
|
||||
{
|
||||
"type": "init",
|
||||
"version": "1.0",
|
||||
"hmac_seed": HANDSHAKE_SEED,
|
||||
"capabilities": ["browser_action"],
|
||||
},
|
||||
)
|
||||
init_ack = read_json_line(stdout_queue, timeout=10)
|
||||
if init_ack.get("type") != "init_ack":
|
||||
raise RuntimeError(f"unexpected init response: {init_ack}")
|
||||
|
||||
send_line(
|
||||
child,
|
||||
{
|
||||
"type": "submit_task",
|
||||
"instruction": "读取知乎热榜数据,并导出 excel 文件",
|
||||
"conversation_id": "",
|
||||
"messages": [],
|
||||
"page_url": "https://www.zhihu.com/",
|
||||
"page_title": "知乎",
|
||||
},
|
||||
)
|
||||
|
||||
logs: list[dict] = []
|
||||
final_task = None
|
||||
current_page = "https://www.zhihu.com/"
|
||||
deadline = time.time() + 180
|
||||
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
message = read_json_line(stdout_queue, timeout=5)
|
||||
except queue.Empty:
|
||||
if child.poll() is not None:
|
||||
break
|
||||
continue
|
||||
msg_type = message.get("type")
|
||||
if msg_type == "log_entry":
|
||||
logs.append(message)
|
||||
continue
|
||||
if msg_type == "command":
|
||||
action = message["action"]
|
||||
params = message.get("params", {})
|
||||
seq = message["seq"]
|
||||
if action == "navigate":
|
||||
current_page = params.get("url", current_page)
|
||||
respond_browser(child, seq, {"navigated": True, "url": current_page})
|
||||
continue
|
||||
if action == "click":
|
||||
selector = params.get("selector", "")
|
||||
if "hot" in selector:
|
||||
current_page = "https://www.zhihu.com/hot"
|
||||
respond_browser(child, seq, {"clicked": True, "selector": selector})
|
||||
continue
|
||||
if action == "getText":
|
||||
text = hotlist_text if "zhihu.com" in current_page else ""
|
||||
respond_browser(child, seq, {"text": text})
|
||||
continue
|
||||
if action == "type":
|
||||
respond_browser(child, seq, {"typed": True})
|
||||
continue
|
||||
respond_browser(child, seq, {"unsupported_action": action}, success=False)
|
||||
continue
|
||||
if msg_type == "task_complete":
|
||||
final_task = message
|
||||
break
|
||||
|
||||
try:
|
||||
child.terminate()
|
||||
child.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
child.kill()
|
||||
child.wait(timeout=5)
|
||||
|
||||
exports = sorted(set(workspace_root.rglob("*.xlsx")) - existing_exports)
|
||||
return {
|
||||
"workspace_root": str(workspace_root),
|
||||
"init_ack": init_ack,
|
||||
"logs": logs,
|
||||
"final_task": final_task,
|
||||
"stderr": stderr_lines,
|
||||
"exports": [str(path) for path in exports],
|
||||
}
|
||||
|
||||
|
||||
def start_reader(stream, output_queue: queue.Queue[str] | None, collector: list[str] | None = None) -> None:
|
||||
def _reader() -> None:
|
||||
try:
|
||||
for line in stream:
|
||||
if collector is not None:
|
||||
collector.append(line.rstrip("\n"))
|
||||
if output_queue is not None:
|
||||
output_queue.put(line)
|
||||
finally:
|
||||
stream.close()
|
||||
|
||||
thread = threading.Thread(target=_reader, daemon=True)
|
||||
thread.start()
|
||||
|
||||
|
||||
def send_line(child: subprocess.Popen, payload: dict) -> None:
|
||||
assert child.stdin is not None
|
||||
child.stdin.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
child.stdin.flush()
|
||||
|
||||
|
||||
def respond_browser(child: subprocess.Popen, seq: int, data: dict, success: bool = True) -> None:
|
||||
send_line(
|
||||
child,
|
||||
{
|
||||
"type": "response",
|
||||
"seq": seq,
|
||||
"success": success,
|
||||
"data": data,
|
||||
"aom_snapshot": [],
|
||||
"timing": {"queue_ms": 1, "exec_ms": 10},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def read_json_line(output_queue: queue.Queue[str], timeout: int) -> dict:
|
||||
raw = output_queue.get(timeout=timeout)
|
||||
return json.loads(raw)
|
||||
|
||||
|
||||
def score_acceptance(result: dict, items: list[HotItem]) -> dict:
|
||||
logs = [entry.get("message", "") for entry in result["logs"]]
|
||||
final_task = result.get("final_task") or {}
|
||||
exports = [Path(path) for path in result["exports"]]
|
||||
exported_path = resolve_exported_path(exports, final_task.get("summary", ""))
|
||||
|
||||
skill_selection = 0
|
||||
executed_hotlist_collection = (
|
||||
"navigate https://www.zhihu.com/hot" in logs and
|
||||
any(message.startswith("getText ") for message in logs)
|
||||
)
|
||||
read_hotlist_skill = "read_skill zhihu-hotlist" in logs
|
||||
read_office_skill = "read_skill office-export-xlsx" in logs
|
||||
completed_office_export = "call openxml_office" in logs
|
||||
|
||||
if read_hotlist_skill or executed_hotlist_collection:
|
||||
skill_selection += 15
|
||||
if read_office_skill or completed_office_export:
|
||||
skill_selection += 15
|
||||
if read_hotlist_skill and read_office_skill and \
|
||||
logs.index("read_skill zhihu-hotlist") > logs.index("read_skill office-export-xlsx"):
|
||||
skill_selection = max(0, skill_selection - 15)
|
||||
|
||||
tool_discipline = 25
|
||||
if any(message == "call shell" for message in logs):
|
||||
tool_discipline -= 15
|
||||
if any(message == "call glob_search" for message in logs):
|
||||
tool_discipline -= 10
|
||||
if any(message == "call file_read" for message in logs):
|
||||
tool_discipline -= 10
|
||||
tool_discipline = max(0, tool_discipline)
|
||||
|
||||
hotlist_data_correctness = 0
|
||||
xlsx_export_success = 0
|
||||
workbook_ok = False
|
||||
if exported_path and exported_path.exists():
|
||||
with zipfile.ZipFile(exported_path) as archive:
|
||||
sheet_xml = archive.read("xl/worksheets/sheet1.xml").decode("utf-8")
|
||||
workbook_xml = archive.read("xl/workbook.xml").decode("utf-8")
|
||||
title_matches = sum(1 for item in items if item.title in sheet_xml)
|
||||
heat_matches = sum(1 for item in items if item.heat in sheet_xml)
|
||||
if title_matches >= 10 and heat_matches >= 10:
|
||||
hotlist_data_correctness = 20
|
||||
elif title_matches >= 8 and heat_matches >= 8:
|
||||
hotlist_data_correctness = 15
|
||||
elif title_matches >= 5 and heat_matches >= 5:
|
||||
hotlist_data_correctness = 10
|
||||
workbook_ok = "知乎热榜" in workbook_xml and title_matches >= 10
|
||||
if workbook_ok:
|
||||
xlsx_export_success = 20
|
||||
|
||||
final_response_quality = 0
|
||||
summary = final_task.get("summary", "")
|
||||
if final_task.get("success") and summary.strip():
|
||||
final_response_quality = 5
|
||||
|
||||
deductions = []
|
||||
if not exported_path:
|
||||
deductions.append("export missing output path")
|
||||
|
||||
total_score = (
|
||||
skill_selection
|
||||
+ tool_discipline
|
||||
+ hotlist_data_correctness
|
||||
+ xlsx_export_success
|
||||
+ final_response_quality
|
||||
)
|
||||
|
||||
return {
|
||||
"total_score": total_score,
|
||||
"skill_selection": skill_selection,
|
||||
"tool_discipline": tool_discipline,
|
||||
"hotlist_data_correctness": hotlist_data_correctness,
|
||||
"xlsx_export_success": xlsx_export_success,
|
||||
"final_response_quality": final_response_quality,
|
||||
"final_success": bool(final_task.get("success")),
|
||||
"final_summary": summary,
|
||||
"exported_path": str(exported_path) if exported_path else "",
|
||||
"deductions": deductions,
|
||||
"logs": logs,
|
||||
"stderr": result["stderr"],
|
||||
}
|
||||
|
||||
|
||||
def resolve_exported_path(exports: list[Path], summary: str) -> Path | None:
|
||||
match = re.search(r"(/[^\s`]+\.xlsx)", summary)
|
||||
if match:
|
||||
candidate = Path(match.group(1))
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
|
||||
filtered = [
|
||||
path
|
||||
for path in exports
|
||||
if path.name != "zhihu_hotlist_template.xlsx"
|
||||
]
|
||||
if filtered:
|
||||
return sorted(filtered)[-1]
|
||||
return None
|
||||
|
||||
|
||||
def write_acceptance_doc(result: dict, items: list[HotItem], score: dict) -> None:
|
||||
ACCEPTANCE_DOC.parent.mkdir(parents=True, exist_ok=True)
|
||||
lines = [
|
||||
"# Zhihu Hotlist Excel Acceptance",
|
||||
"",
|
||||
f"- Date: {time.strftime('%Y-%m-%d %H:%M:%S %z')}",
|
||||
"- Mode: real provider + live Zhihu hotlist API + simulated browser pipe",
|
||||
f"- Workspace: `{result['workspace_root']}`",
|
||||
f"- Final success: `{score['final_success']}`",
|
||||
f"- Total score: `{score['total_score']}/100`",
|
||||
"",
|
||||
"## Rubric",
|
||||
"",
|
||||
f"- skill selection: `{score['skill_selection']}/30`",
|
||||
f"- tool discipline: `{score['tool_discipline']}/25`",
|
||||
f"- hotlist data correctness: `{score['hotlist_data_correctness']}/20`",
|
||||
f"- xlsx export success: `{score['xlsx_export_success']}/20`",
|
||||
f"- final response quality: `{score['final_response_quality']}/5`",
|
||||
"",
|
||||
"## Final Output",
|
||||
"",
|
||||
f"- exported_path: `{score['exported_path']}`",
|
||||
f"- final_summary: `{score['final_summary']}`",
|
||||
"",
|
||||
"## Skill Logs",
|
||||
"",
|
||||
]
|
||||
for message in score["logs"]:
|
||||
lines.append(f"- `{message}`")
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
"## Live Hotlist Sample",
|
||||
"",
|
||||
]
|
||||
)
|
||||
for item in items:
|
||||
lines.append(f"- {item.rank}. {item.title} | {item.heat}")
|
||||
if score["stderr"]:
|
||||
lines.extend(["", "## Stderr", ""])
|
||||
for line in score["stderr"]:
|
||||
lines.append(f"- `{line}`")
|
||||
ACCEPTANCE_DOC.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(130)
|
||||
Reference in New Issue
Block a user