acceptance: stabilize zhihu hotlist excel flow

This commit is contained in:
zyl
2026-03-29 23:17:31 +08:00
parent e294fbb9b1
commit ef88487f4a
5 changed files with 287 additions and 59 deletions

View File

@@ -250,16 +250,18 @@ def read_json_line(output_queue: queue.Queue[str], timeout: int) -> dict:
def score_acceptance(result: dict, items: list[HotItem]) -> dict:
logs = [entry.get("message", "") for entry in result["logs"]]
log_entries = result["logs"]
logs = [entry.get("message", "") for entry in log_entries]
final_task = result.get("final_task") or {}
exports = [Path(path) for path in result["exports"]]
exported_path = resolve_exported_path(exports, final_task.get("summary", ""))
skill_selection = 0
executed_hotlist_collection = (
browser_path_exists = (
"navigate https://www.zhihu.com/hot" in logs and
any(message.startswith("getText ") for message in logs)
)
skill_selection = 0
executed_hotlist_collection = browser_path_exists
read_hotlist_skill = "read_skill zhihu-hotlist" in logs
read_office_skill = "read_skill office-export-xlsx" in logs
completed_office_export = "call openxml_office" in logs
@@ -302,12 +304,24 @@ def score_acceptance(result: dict, items: list[HotItem]) -> dict:
final_response_quality = 0
summary = final_task.get("summary", "")
if final_task.get("success") and summary.strip():
repeated_paragraphs = find_repeated_paragraphs(summary)
if final_task.get("success") and summary.strip() and not repeated_paragraphs:
final_response_quality = 5
deductions = []
planner_index = find_planner_log_index(log_entries)
first_tool_index = find_first_tool_execution_index(logs)
if planner_index is None or (first_tool_index is not None and planner_index > first_tool_index):
deductions.append("planner output missing before tool execution")
if repeated_paragraphs:
deductions.append("repeated assistant paragraphs detected")
if not exported_path:
deductions.append("export missing output path")
if browser_path_exists and (not exported_path or hotlist_data_correctness == 0):
deductions.append("hotlist rows were not exported as structured live data")
if logs.count("call openxml_office") > 1 or any(
"unsupported columns:" in message for message in logs):
deductions.append("structured handoff required export retries")
total_score = (
skill_selection
@@ -316,6 +330,7 @@ def score_acceptance(result: dict, items: list[HotItem]) -> dict:
+ xlsx_export_success
+ final_response_quality
)
total_score = max(0, total_score - acceptance_penalty(deductions))
return {
"total_score": total_score,
@@ -333,6 +348,58 @@ def score_acceptance(result: dict, items: list[HotItem]) -> dict:
}
def find_planner_log_index(log_entries: list[dict]) -> int | None:
for index, entry in enumerate(log_entries):
message = str(entry.get("message", "")).strip()
if entry.get("level") == "plan":
return index
if not message:
continue
if message.startswith("plan ") or "先规划再执行" in message:
return index
return None
def find_first_tool_execution_index(logs: list[str]) -> int | None:
tool_prefixes = (
"navigate ",
"click ",
"type ",
"getText ",
"call openxml_office",
"call screen_html_export",
)
for index, message in enumerate(logs):
if message.startswith(tool_prefixes):
return index
return None
def find_repeated_paragraphs(summary: str) -> list[str]:
seen: set[str] = set()
repeated: list[str] = []
for paragraph in re.split(r"\n\s*\n", summary):
normalized = re.sub(r"\s+", " ", paragraph).strip()
if not normalized:
continue
if normalized in seen and normalized not in repeated:
repeated.append(normalized)
continue
seen.add(normalized)
return repeated
def acceptance_penalty(deductions: list[str]) -> int:
penalty_map = {
"planner output missing before tool execution": 10,
"repeated assistant paragraphs detected": 10,
"export missing output path": 10,
"hotlist rows were not exported as structured live data": 15,
"structured handoff required export retries": 10,
}
return sum(penalty_map.get(item, 0) for item in deductions)
def resolve_exported_path(exports: list[Path], summary: str) -> Path | None:
match = re.search(r"(/[^\s`]+\.xlsx)", summary)
if match: