103 lines
3.9 KiB
Python
103 lines
3.9 KiB
Python
import unittest
|
|
|
|
from tools.live_acceptance.run_zhihu_hotlist_excel_acceptance import HotItem, score_acceptance
|
|
|
|
|
|
class LiveAcceptanceScoreTest(unittest.TestCase):
|
|
def test_score_acceptance_handles_preloaded_office_skill_without_read_skill_log(self):
|
|
result = {
|
|
"logs": [
|
|
{"message": "plan 读取知乎热榜并导出 Excel"},
|
|
{"message": "navigate https://www.zhihu.com/hot"},
|
|
{"message": "getText body"},
|
|
{"message": "call openxml_office"},
|
|
],
|
|
"final_task": {
|
|
"success": True,
|
|
"summary": "已导出 Excel /tmp/sgclaw/out.xlsx",
|
|
},
|
|
"stderr": [],
|
|
"exports": [],
|
|
}
|
|
items = [HotItem(rank=1, title="标题", heat="123万")]
|
|
|
|
score = score_acceptance(result, items)
|
|
|
|
self.assertEqual(score["skill_selection"], 30)
|
|
self.assertEqual(score["final_response_quality"], 5)
|
|
self.assertNotIn("planner output missing before tool execution", score["deductions"])
|
|
|
|
def test_score_acceptance_flags_missing_plan_repeated_summary_and_fake_export_path(self):
|
|
repeated = "第一段总结。\n\n第一段总结。"
|
|
result = {
|
|
"logs": [
|
|
{"message": "navigate https://www.zhihu.com/hot"},
|
|
{"message": "getText main"},
|
|
{"message": "call openxml_office"},
|
|
],
|
|
"final_task": {
|
|
"success": True,
|
|
"summary": f"{repeated}\n\n导出路径:/tmp/not-real.xlsx",
|
|
},
|
|
"stderr": [],
|
|
"exports": [],
|
|
}
|
|
items = [HotItem(rank=1, title="标题", heat="123万")]
|
|
|
|
score = score_acceptance(result, items)
|
|
|
|
self.assertIn("planner output missing before tool execution", score["deductions"])
|
|
self.assertIn("repeated assistant paragraphs detected", score["deductions"])
|
|
self.assertIn("export missing output path", score["deductions"])
|
|
self.assertEqual(score["final_response_quality"], 0)
|
|
|
|
def test_score_acceptance_flags_fake_rows_when_export_contains_no_live_hotlist_data(self):
|
|
result = {
|
|
"logs": [
|
|
{"message": "plan 读取知乎热榜并导出 Excel"},
|
|
{"message": "navigate https://www.zhihu.com/hot"},
|
|
{"message": "getText main"},
|
|
{"message": "call openxml_office"},
|
|
],
|
|
"final_task": {
|
|
"success": True,
|
|
"summary": "已导出 Excel /tmp/sgclaw/out.xlsx",
|
|
},
|
|
"stderr": [],
|
|
"exports": [],
|
|
}
|
|
items = [HotItem(rank=1, title="真实标题", heat="123万")]
|
|
|
|
score = score_acceptance(result, items)
|
|
|
|
self.assertIn("hotlist rows were not exported as structured live data", score["deductions"])
|
|
self.assertEqual(score["hotlist_data_correctness"], 0)
|
|
self.assertEqual(score["xlsx_export_success"], 0)
|
|
|
|
def test_score_acceptance_flags_structured_handoff_retry_noise(self):
|
|
result = {
|
|
"logs": [
|
|
{"message": "plan 读取知乎热榜并导出 Excel"},
|
|
{"message": "navigate https://www.zhihu.com/hot"},
|
|
{"message": "getText main"},
|
|
{"message": "call openxml_office"},
|
|
{"message": "unsupported columns: expected [rank, title, heat]"},
|
|
{"message": "call openxml_office"},
|
|
],
|
|
"final_task": {
|
|
"success": True,
|
|
"summary": "已导出 Excel /tmp/sgclaw/out.xlsx",
|
|
},
|
|
"stderr": [],
|
|
"exports": [],
|
|
}
|
|
items = [HotItem(rank=1, title="真实标题", heat="123万")]
|
|
|
|
score = score_acceptance(result, items)
|
|
|
|
self.assertIn("structured handoff required export retries", score["deductions"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|