Files
claw/tools/generate_scene_validation_xlsx.py

307 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import datetime as _dt
import os
import xml.sax.saxutils as saxutils
import zipfile
from pathlib import Path
ROOT = Path(r"D:\data\ideaSpace\rust\sgClaw\claw-new")
SCENES_DIR = Path(r"D:\desk\智能体资料\全量业务场景\一平台场景")
OUTPUT = ROOT / "docs" / "2026-04-18-102-scenes-validation-overview.xlsx"
SCENE_STATUS = {
"台区线损大数据-月_周累计线损率统计分析": {
"family": "G2",
"group": "线损多模式家族",
"validated": "",
"status": "已实跑",
"result": "未通过",
"conclusion": "首轮已生成但语义未通过,后续已进入 G2 整改主线",
},
"白银线损周报": {
"family": "G2",
"group": "线损多模式家族",
"validated": "",
"status": "已实跑",
"result": "未通过",
"conclusion": "已生成,但首轮语义未通过",
},
"线损同期差异报表": {
"family": "G2",
"group": "线损多模式家族",
"validated": "",
"status": "已实跑",
"result": "未通过",
"conclusion": "已生成,但首轮语义未通过",
},
"高低压新增报装容量月度统计表": {
"family": "G1-E",
"group": "业扩/报装轻量补查家族",
"validated": "",
"status": "已实跑并通过 P0",
"result": "通过",
"conclusion": "当前唯一已打通的 G1-E 真实样本",
},
"电能表现场检验完成率指标报表": {
"family": "G6",
"group": "宿主桥接多步查询家族",
"validated": "",
"status": "已复核",
"result": "已重分组",
"conclusion": "从 G1 边界移出,判为宿主桥接多步查询型",
},
"计量资产库存统计": {
"family": "G7",
"group": "多接口盘点汇总家族",
"validated": "",
"status": "已复核",
"result": "已重分组",
"conclusion": "从 G1 边界移出,判为多接口盘点汇总型",
},
"95598供电服务月报": {
"family": "G8",
"group": "抓取落库分析出文档家族",
"validated": "",
"status": "已复核",
"result": "已重分组",
"conclusion": "从 G1 边界移出,判为抓取落库分析出文档型",
},
"用电报装信息统计列表": {
"family": "G1-E 候选/待重分",
"group": "业扩/报装轻量补查家族",
"validated": "",
"status": "已实跑",
"result": "Fail-closed",
"conclusion": "被识别为 single_request_enrichment但证据不闭环",
},
"业扩报装质量评价体系": {
"family": "G1-E 候选/待重分",
"group": "业扩/报装轻量补查家族",
"validated": "",
"status": "已实跑",
"result": "Fail-closed",
"conclusion": "被识别为 single_request_enrichment但证据不闭环",
},
}
def infer_group(scene_name: str) -> tuple[str, str]:
if scene_name in SCENE_STATUS:
item = SCENE_STATUS[scene_name]
return item["group"], item["family"]
if "线损" in scene_name:
return "线损家族候选", "候选 G2"
if any(token in scene_name for token in ["报装", "业扩", "供电方案"]):
return "业扩/报装家族候选", "候选 G1-E/G6"
if any(token in scene_name for token in ["95598", "12398"]):
return "95598/工单家族候选", "待分组"
if any(token in scene_name for token in ["计量", "表计", "库存"]):
return "计量/资产家族候选", "待分组"
if any(token in scene_name for token in ["日报", "周报", "月报", "统计", "报表", "报告"]):
return "通用报表候选", "待分组"
return "未分组", "待分组"
def build_summary_rows(scene_names: list[str]) -> list[list[str]]:
validated_count = len(SCENE_STATUS)
passed_count = sum(1 for item in SCENE_STATUS.values() if item["result"] == "通过")
failed_closed = sum(1 for item in SCENE_STATUS.values() if item["result"] == "Fail-closed")
regrouped = sum(1 for item in SCENE_STATUS.values() if item["result"] == "已重分组")
not_passed = sum(1 for item in SCENE_STATUS.values() if item["result"] == "未通过")
unvalidated = len(scene_names) - validated_count
return [
["统计项", "数量", "说明"],
["总场景数", str(len(scene_names)), "场景目录实际统计值"],
["已有明确验证结论", str(validated_count), "已实跑或已基于生成结果形成正式结论"],
["尚未进入当前轮实跑验证", str(unvalidated), "暂无单场景正式验证结论"],
["已通过真实样本验证", str(passed_count), "当前仅 1 个 G1-E P0 样本通过"],
["已实跑但未通过", str(not_passed), "当前主要集中在 G2 首轮样本"],
["已实跑且 Fail-closed", str(failed_closed), "识别正确但当前合同不闭环"],
["已完成重分组复核", str(regrouped), "已从 G1 边界移出的样本"],
["生成时间", _dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "本 Excel 生成时间"],
]
def build_verified_rows() -> list[list[str]]:
rows = [["场景", "当前归属/目标家族", "分组结果", "是否已验证", "当前状态", "验证结果", "结论"]]
for scene_name, item in SCENE_STATUS.items():
rows.append(
[
scene_name,
item["family"],
item["group"],
item["validated"],
item["status"],
item["result"],
item["conclusion"],
]
)
return rows
def build_all_rows(scene_names: list[str]) -> list[list[str]]:
rows = [["场景", "当前分组结果", "当前家族判断", "是否已有明确验证结论", "当前验证状态", "验证结果", "备注"]]
for scene_name in scene_names:
if scene_name in SCENE_STATUS:
item = SCENE_STATUS[scene_name]
rows.append(
[
scene_name,
item["group"],
item["family"],
item["validated"],
item["status"],
item["result"],
item["conclusion"],
]
)
else:
group, family = infer_group(scene_name)
rows.append(
[
scene_name,
group,
family,
"",
"未进入当前轮实跑验证",
"",
"当前仅有目录信息或家族候选判断,尚无单场景正式验证结论",
]
)
return rows
def col_name(index: int) -> str:
result = ""
while index > 0:
index, rem = divmod(index - 1, 26)
result = chr(65 + rem) + result
return result
def sheet_xml(rows: list[list[str]]) -> str:
xml_rows: list[str] = []
for r_idx, row in enumerate(rows, start=1):
cells: list[str] = []
for c_idx, value in enumerate(row, start=1):
ref = f"{col_name(c_idx)}{r_idx}"
text = saxutils.escape("" if value is None else str(value))
cells.append(f'<c r="{ref}" t="inlineStr"><is><t>{text}</t></is></c>')
xml_rows.append(f'<row r="{r_idx}">{"".join(cells)}</row>')
dimension = f"A1:{col_name(max(len(r) for r in rows))}{len(rows)}"
return (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">'
f'<dimension ref="{dimension}"/>'
"<sheetViews><sheetView workbookViewId=\"0\"/></sheetViews>"
"<sheetFormatPr defaultRowHeight=\"15\"/>"
"<sheetData>"
+ "".join(xml_rows)
+ "</sheetData></worksheet>"
)
def write_xlsx(output: Path, sheets: list[tuple[str, list[list[str]]]]) -> None:
output.parent.mkdir(parents=True, exist_ok=True)
content_types = [
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>',
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">',
'<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>',
'<Default Extension="xml" ContentType="application/xml"/>',
'<Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>',
'<Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/>',
'<Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/>',
]
for idx in range(1, len(sheets) + 1):
content_types.append(
f'<Override PartName="/xl/worksheets/sheet{idx}.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>'
)
content_types.append("</Types>")
workbook_sheets = []
workbook_rels = []
for idx, (name, _rows) in enumerate(sheets, start=1):
safe_name = saxutils.escape(name)
workbook_sheets.append(
f'<sheet name="{safe_name}" sheetId="{idx}" r:id="rId{idx}"/>'
)
workbook_rels.append(
f'<Relationship Id="rId{idx}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet{idx}.xml"/>'
)
workbook_xml = (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" '
'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">'
"<sheets>"
+ "".join(workbook_sheets)
+ "</sheets></workbook>"
)
workbook_rels_xml = (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
+ "".join(workbook_rels)
+ "</Relationships>"
)
root_rels_xml = (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
'<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>'
'<Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/>'
'<Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/>'
"</Relationships>"
)
now = _dt.datetime.now(_dt.UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
core_xml = (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties" '
'xmlns:dc="http://purl.org/dc/elements/1.1/" '
'xmlns:dcterms="http://purl.org/dc/terms/" '
'xmlns:dcmitype="http://purl.org/dc/dcmitype/" '
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">'
"<dc:creator>Codex</dc:creator>"
"<cp:lastModifiedBy>Codex</cp:lastModifiedBy>"
f'<dcterms:created xsi:type="dcterms:W3CDTF">{now}</dcterms:created>'
f'<dcterms:modified xsi:type="dcterms:W3CDTF">{now}</dcterms:modified>'
"</cp:coreProperties>"
)
app_xml = (
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
'<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties" '
'xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes">'
"<Application>Microsoft Excel</Application>"
"</Properties>"
)
with zipfile.ZipFile(output, "w", compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr("[Content_Types].xml", "".join(content_types))
zf.writestr("_rels/.rels", root_rels_xml)
zf.writestr("docProps/core.xml", core_xml)
zf.writestr("docProps/app.xml", app_xml)
zf.writestr("xl/workbook.xml", workbook_xml)
zf.writestr("xl/_rels/workbook.xml.rels", workbook_rels_xml)
for idx, (_name, rows) in enumerate(sheets, start=1):
zf.writestr(f"xl/worksheets/sheet{idx}.xml", sheet_xml(rows))
def main() -> None:
scene_names = sorted(p.name for p in SCENES_DIR.iterdir() if p.is_dir())
sheets = [
("汇总", build_summary_rows(scene_names)),
("已验证场景", build_verified_rows()),
("102场景总表", build_all_rows(scene_names)),
]
write_xlsx(OUTPUT, sheets)
print(os.fspath(OUTPUT))
if __name__ == "__main__":
main()