acceptance: stabilize zhihu hotlist excel flow
This commit is contained in:
@@ -51,3 +51,41 @@ async fn openxml_office_tool_renders_hotlist_xlsx_from_rows() {
|
||||
assert!(xml.contains("问题二"));
|
||||
assert!(!xml.contains("{{TITLE_1}}"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn openxml_office_tool_accepts_reordered_columns_when_rows_are_structured() {
|
||||
let workspace_root = temp_workspace_root();
|
||||
let output_path = workspace_root.join("out/zhihu-hotlist-reordered.xlsx");
|
||||
let tool = OpenXmlOfficeTool::new(workspace_root.clone());
|
||||
|
||||
let result = tool
|
||||
.execute(json!({
|
||||
"sheet_name": "知乎热榜",
|
||||
"columns": ["title", "heat", "rank"],
|
||||
"rows": [
|
||||
["问题一", "344万", 1],
|
||||
["问题二", "266万", 2]
|
||||
],
|
||||
"output_path": output_path
|
||||
}))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(result.success, "{result:?}");
|
||||
assert!(output_path.exists());
|
||||
|
||||
let unzip = ProcessCommand::new("unzip")
|
||||
.args([
|
||||
"-p",
|
||||
output_path.to_str().unwrap(),
|
||||
"xl/worksheets/sheet1.xml",
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(unzip.status.success());
|
||||
|
||||
let xml = String::from_utf8(unzip.stdout).unwrap();
|
||||
assert!(xml.contains("问题一"));
|
||||
assert!(xml.contains("344万"));
|
||||
assert!(xml.contains(">1<"));
|
||||
}
|
||||
|
||||
@@ -7,14 +7,14 @@ class LiveAcceptanceScoreTest(unittest.TestCase):
|
||||
def test_score_acceptance_handles_preloaded_office_skill_without_read_skill_log(self):
|
||||
result = {
|
||||
"logs": [
|
||||
{"message": "navigate https://www.zhihu.com/hot"},
|
||||
{"message": "plan 读取知乎热榜并导出 Excel"},
|
||||
{"message": "navigate https://www.zhihu.com/hot"},
|
||||
{"message": "getText body"},
|
||||
{"message": "call openxml_office"},
|
||||
],
|
||||
"final_task": {
|
||||
"success": True,
|
||||
"summary": "已导出 Excel",
|
||||
"summary": "已导出 Excel /tmp/sgclaw/out.xlsx",
|
||||
},
|
||||
"stderr": [],
|
||||
"exports": [],
|
||||
@@ -25,6 +25,77 @@ class LiveAcceptanceScoreTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(score["skill_selection"], 30)
|
||||
self.assertEqual(score["final_response_quality"], 5)
|
||||
self.assertNotIn("planner output missing before tool execution", score["deductions"])
|
||||
|
||||
def test_score_acceptance_flags_missing_plan_repeated_summary_and_fake_export_path(self):
|
||||
repeated = "第一段总结。\n\n第一段总结。"
|
||||
result = {
|
||||
"logs": [
|
||||
{"message": "navigate https://www.zhihu.com/hot"},
|
||||
{"message": "getText main"},
|
||||
{"message": "call openxml_office"},
|
||||
],
|
||||
"final_task": {
|
||||
"success": True,
|
||||
"summary": f"{repeated}\n\n导出路径:/tmp/not-real.xlsx",
|
||||
},
|
||||
"stderr": [],
|
||||
"exports": [],
|
||||
}
|
||||
items = [HotItem(rank=1, title="标题", heat="123万")]
|
||||
|
||||
score = score_acceptance(result, items)
|
||||
|
||||
self.assertIn("planner output missing before tool execution", score["deductions"])
|
||||
self.assertIn("repeated assistant paragraphs detected", score["deductions"])
|
||||
self.assertIn("export missing output path", score["deductions"])
|
||||
self.assertEqual(score["final_response_quality"], 0)
|
||||
|
||||
def test_score_acceptance_flags_fake_rows_when_export_contains_no_live_hotlist_data(self):
|
||||
result = {
|
||||
"logs": [
|
||||
{"message": "plan 读取知乎热榜并导出 Excel"},
|
||||
{"message": "navigate https://www.zhihu.com/hot"},
|
||||
{"message": "getText main"},
|
||||
{"message": "call openxml_office"},
|
||||
],
|
||||
"final_task": {
|
||||
"success": True,
|
||||
"summary": "已导出 Excel /tmp/sgclaw/out.xlsx",
|
||||
},
|
||||
"stderr": [],
|
||||
"exports": [],
|
||||
}
|
||||
items = [HotItem(rank=1, title="真实标题", heat="123万")]
|
||||
|
||||
score = score_acceptance(result, items)
|
||||
|
||||
self.assertIn("hotlist rows were not exported as structured live data", score["deductions"])
|
||||
self.assertEqual(score["hotlist_data_correctness"], 0)
|
||||
self.assertEqual(score["xlsx_export_success"], 0)
|
||||
|
||||
def test_score_acceptance_flags_structured_handoff_retry_noise(self):
|
||||
result = {
|
||||
"logs": [
|
||||
{"message": "plan 读取知乎热榜并导出 Excel"},
|
||||
{"message": "navigate https://www.zhihu.com/hot"},
|
||||
{"message": "getText main"},
|
||||
{"message": "call openxml_office"},
|
||||
{"message": "unsupported columns: expected [rank, title, heat]"},
|
||||
{"message": "call openxml_office"},
|
||||
],
|
||||
"final_task": {
|
||||
"success": True,
|
||||
"summary": "已导出 Excel /tmp/sgclaw/out.xlsx",
|
||||
},
|
||||
"stderr": [],
|
||||
"exports": [],
|
||||
}
|
||||
items = [HotItem(rank=1, title="真实标题", heat="123万")]
|
||||
|
||||
score = score_acceptance(result, items)
|
||||
|
||||
self.assertIn("structured handoff required export retries", score["deductions"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user