diff --git a/docs/2026-04-18-102-scenes-validation-overview.xlsx b/docs/2026-04-18-102-scenes-validation-overview.xlsx new file mode 100644 index 0000000..d924290 Binary files /dev/null and b/docs/2026-04-18-102-scenes-validation-overview.xlsx differ diff --git a/docs/sgClaw技术路线总览.html b/docs/sgClaw技术路线总览.html new file mode 100644 index 0000000..313f00b --- /dev/null +++ b/docs/sgClaw技术路线总览.html @@ -0,0 +1,309 @@ + + + + + + sgClaw 智能浏览器自动化平台 - 技术路线总览 + + + + +
+

sgClaw 智能浏览器自动化平台

+
用自然语言驱动浏览器操作 让业务流程自动执行
+
+
+ +
+
+
1
+
一句话理解 sgClaw
+
+
+
+ sgClaw 是一个"智能浏览器助手"。用户用自然语言说出需求(例如"帮我查本月线损率"),sgClaw 自动在浏览器中完成点击、输入、查询、导出等一系列操作,最终将结果呈现给用户。全程无需人工逐步操作浏览器。 +
+
+
+ +
+
+
2
+
整体业务流程 - 从用户指令到结果呈现
+
+
+
+ 以下是用户使用 sgClaw 的完整流程。用户只需输入一句话,剩下的全部自动完成。 +
+
+graph LR + A["用户输入自然语言指令\n例如: 帮我查本月线损率"] --> B["sgClaw 理解指令意图\n识别是哪个业务场景"] + B --> C{"是否已知场景?"} + C -->|是 已知场景| D["直接执行预设流程\n快速通道 无需AI"] + C -->|否 新场景| E["AI大模型分析理解\n拆解为具体操作步骤"] + D --> F["自动操作浏览器\n点击 输入 查询 导出"] + E --> F + F --> G["将结果呈现给用户\n生成报表 打开Excel"] + + classDef userInput fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef ai fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef fast fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + classDef action fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef result fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + + class A userInput + class B ai + class C ai + class D fast + class E ai + class F action + class G result +
+
+
+ +
+
+
3
+
平台如何与现有业务系统协同工作
+
+
+
+ sgClaw 不需要改造现有业务系统,它像一个坐在电脑前的员工,直接操作浏览器完成工作。 +
+
+graph TB + User["业务人员\n分公司副主任 线损专责 班组长"] + + subgraph Platform["统一业务平台"] + S1["线损大数据系统\n查询线损率 统计分析"] + S2["95598客服系统\n故障报修 工单处理"] + S3["其他业务子系统\n..."] + end + + subgraph sgClaw["sgClaw 智能助手"] + SG1["理解用户自然语言指令"] + SG2["自动操作浏览器完成任务"] + SG3["安全保障 权限管控"] + end + + Result["最终结果\nExcel报表 Word文档 数据展示"] + + User -->|"说出需求"| SG1 + SG1 --> SG2 + SG1 --> SG3 + SG2 -->|"自动点击查询"| S1 + SG2 -->|"自动填写表单"| S2 + SG2 -->|"自动导出报表"| S3 + S1 -->|"数据返回"| SG2 + S2 -->|"数据返回"| SG2 + S3 -->|"数据返回"| SG2 + SG2 -->|"生成报表文件"| Result + Result -->|"展示给用户"| User + + classDef people fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef plat fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef sg fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef out fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + + class User people + class Platform,S1,S2,S3 plat + class sgClaw,SG1,SG2,SG3 sg + class Result out +
+
+
+ +
+
+
4
+
安全管控体系
+
+
+
+ sgClaw 建立了三道安全防线,确保即使在AI驱动下,所有操作也在可控范围内。 +
+
+graph TB + A["第一道防线\n身份确认: 确保通信双方可信"] --> B["第二道防线\n规则校验: 只能访问允许的系统和页面"] + B --> C["第三道防线\n二次复核: 操作前再次确认合法性"] + C --> D["最终结果\n所有操作可追溯 可审计"] + + classDef l1 fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef l2 fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef l3 fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef ok fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + + class A l1 + class B l2 + class C l3 + class D ok +
+
+
+ +
+
+
5
+
两种运行模式
+
+
+
+ sgClaw 支持两种运行方式,适应不同场景需求。 +
+
+graph LR + subgraph Mode1["模式一: 嵌入式 浏览器子进程模式"] + M1A["浏览器启动sgClaw"] + M1B["一问一答式通信"] + M1C["适合单次任务执行"] + M1A --> M1B --> M1C + end + + subgraph Mode2["模式二: 独立服务模式"] + M2A["sgClaw作为持久化服务运行"] + M2B["前端网页随时连接使用"] + M2C["适合频繁交互使用"] + M2A --> M2B --> M2C + end + + classDef m1 fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef m2 fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + + class Mode1,M1A,M1B,M1C m1 + class Mode2,M2A,M2B,M2C m2 +
+
+
+ +
+
+
6
+
技术演进路线
+
+
+
+
+
第一阶段
+
+

基础能力构建

+

完成浏览器基础操作能力(点击、输入、导航、读取页面内容),建立安全管控体系,实现与现有业务平台的对接。

+
+
+
+
第二阶段
+
+

AI智能驱动

+

接入AI大模型,支持自然语言理解,用户用日常语言描述需求,AI自动拆解为操作步骤并执行。

+
+
+
+
第三阶段
+
+

业务场景沉淀

+

将高频使用的场景沉淀为标准化技能包(如线损查询、故障统计、周报生成等),实现快速执行,减少对AI的依赖。

+
+
+
+
第四阶段
+
+

平台化服务

+

从单次任务执行升级为持久化服务,支持多用户并发使用,建立完整的技能市场和任务编排体系。

+
+
+
+
+
+ +
+
+
7
+
核心价值
+
+
+
+
+

效率提升

+

原来需要人工逐步操作浏览器完成的任务,现在只需一句话,自动完成查询、导出、报表生成全流程。

+
+
+

零改造接入

+

不需要改造现有业务系统,sgClaw像员工一样直接操作浏览器,对现有系统零侵入。

+
+
+

安全可控

+

三道安全防线确保所有操作在允许范围内,域名白名单、动作管控、二次复核,全程可追溯。

+
+
+

灵活扩展

+

新业务场景通过编写技能包快速接入,已有场景走快速通道无需AI,兼顾效率和灵活性。

+
+
+

技术自主

+

核心代码自主可控,基于Rust语言构建,性能优异,不依赖外部SaaS服务,数据安全有保障。

+
+
+

持续演进

+

从单任务执行到持久化服务,从人工指令到AI驱动,技术路线清晰,逐步构建平台化能力。

+
+
+
+
+ +
+
+
8
+
典型使用场景举例
+
+
+
+ 以下是业务人员日常使用 sgClaw 的真实场景。 +
+
+graph TB + U1["线损专责\n每月查询线损率统计数据"] -->|"输入: 帮我查本月线损率"| SG1["sgClaw自动完成\n打开线损系统 选择月份 查询数据 导出Excel"] + U2["供电所班组长\n每周生成线损分析周报"] -->|"输入: 生成上周线损周报"| SG2["sgClaw自动完成\n查询周数据 汇总分析 生成Word报告"] + U3["客服专责\n处理95598故障工单统计"] -->|"输入: 统计本周故障工单"| SG3["sgClaw自动完成\n登录客服系统 筛选工单 生成统计表"] + + classDef user fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef sg fill:#4a2c17,stroke:#e65100,color:#e6edf3 + + class U1,U2,U3 user + class SG1,SG2,SG3 sg +
+
+
+ +
+ + + + \ No newline at end of file diff --git a/docs/sgClaw系统架构全景图.html b/docs/sgClaw系统架构全景图.html new file mode 100644 index 0000000..dda5e31 --- /dev/null +++ b/docs/sgClaw系统架构全景图.html @@ -0,0 +1,413 @@ + + + + + + sgClaw 系统架构全景图 + + + + +
+

sgClaw 系统架构全景图

+
浏览器宿主 x Rust 安全控制层 x ZeroClaw 能力核心 - 双部署模式 三层安全防线 Skill 体系
+
+
+ +
+
+
1
+
系统边界总览 - 四大区域与数据流向
+
+
+
+graph TB + BH["浏览器宿主\n受保护的安全边界\n启动和托管sgClaw子进程"] + SP["sgClaw进程\nRust安全控制层\nZeroClaw为能力核心"] + ZC["ZeroClaw核心\nvendored crate\n任务分解 工具循环 LLM路由"] + ES["外部服务\nLLM API和业务浏览器页面"] + BH <-- "STDIO JSON Line 进程间通信协议" --> SP + SP <-- "Rust API调用 vendored库" --> ZC + ZC <-- "HTTP API 或内部调用" --> ES + SP <-- "Browser Backend Pipe或WS" --> ES + classDef hostClass fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef sgclawClass fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef zcClass fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef extClass fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + class BH hostClass + class SP sgclawClass + class ZC zcClass + class ES extClass +
+
+
+ +
+
+
2
+
双部署模式 - Pipe Mode STDIO一问一答
+
+
+
+sequenceDiagram + participant Host as 浏览器宿主 + participant Pipe as StdioTransport + participant MAC as MAC Policy + participant Agent as Agent/TaskRunner + participant ZC as ZeroClaw Runtime + participant Tool as BrowserPipeTool + participant Exec as 宿主命令执行器 + Note over Host,Exec: Pipe Mode 一问一答式STDIO通信 + Host->>Pipe: Init 握手:携带版本号 HMAC种子 能力列表 + Pipe->>Pipe: derive_session_key 派生会话密钥 + Pipe-->>Host: InitAck 确认:返回agent_id和支持动作 + Host->>Agent: SubmitTask 提交任务 + Agent->>Agent: 检测确定性提交模式 + alt 确定性提交 + Agent->>Agent: 生成执行计划 + Agent->>Tool: 直接执行Skill + else LLM驱动 + Agent->>ZC: 构造ZeroClaw Agent + ZC->>Tool: tool loop调用 + end + Tool->>MAC: 校验域名和动作 + MAC-->>Tool: 允许或拒绝 + Tool->>Pipe: 写入Command JSON + Pipe-->>Host: 浏览器接收命令 + Host->>Exec: 执行浏览器命令 + Exec-->>Host: 返回执行结果 + Host->>Pipe: Response回包 + Pipe-->>Tool: 结果回传 + Tool-->>ZC: ToolResult + ZC-->>Agent: 继续或完成 + Agent-->>Host: TaskComplete +
+
+
+ +
+
+
3
+
双部署模式 - Service Mode TCP加WebSocket加Helper Page桥接
+
+
+
+sequenceDiagram + participant Console as 前端控制台 + participant WS as WebSocket Server + participant Agent as Agent/TaskRunner + participant CB as BrowserCallbackBackend + participant HTTP as Callback HTTP Server + participant Helper as Helper Page + participant Target as 目标业务页面 + Note over Console,Target: Service Mode 持久化服务+Helper Page桥接 + Console->>WS: WebSocket Connect + WSS->>CB: 创建会话 + Console->>WS: SubmitTask + WS->>Agent: 分发任务 + Agent->>CB: invoke执行 + CB->>HTTP: POST Command到队列 + HTTP-->>Helper: long-poll返回Command + Helper->>Target: sgBrowserExcuteJsCodeByDomain执行JS + Target-->>Helper: callBackJsToCpp回调 + Helper->>HTTP: POST事件回传 + HTTP-->>CB: Callback事件 + CB-->>Agent: CommandOutput + Agent-->>WS: TaskComplete + WS-->>Console: 推送结果 +
+
+
+ +
+
+
4
+
sgClaw 内部模块关系图
+
+
+
+graph LR + E1["main.rs Pipe模式入口"] + E2["service模式入口"] + P1["StdioTransport STDIO读写"] + P2["消息枚举定义"] + P3["Handshake握手协议"] + P4["BrowserPipeTool发送等待响应"] + P5["HMAC签名防篡改"] + M1["MacPolicy加载解析"] + M2["Domain白名单标准化比对"] + M3["Action黑白名单双重过滤"] + A1["消息分发handle_browser_message"] + A2["TaskRunner任务解析"] + A3["Deterministic Submit指令检测"] + C1["RuntimeEngine构建Agent"] + C2["ToolPolicy工具权限"] + C3["BrowserScriptSkillTool执行器"] + C4["DeterministicSubmit线损快速通道"] + C5["BrowserToolAdapter工具适配"] + B1["BrowserBackend统一接口"] + B2["PipeBrowserBackend实现"] + B3["WsBrowserBackend实现"] + B4["BrowserCallbackBackend实现"] + SV1["WebSocket Server监听"] + SV2["Session Manager单客户端单任务"] + SV3["Callback HTTP Server监听"] + CF1["SgClawSettings加载"] + CF2["Provider Config"] + CF3["Backend Selection选择"] + E1 --> P1 --> P2 --> P3 --> P4 --> P5 --> M1 + M1 --> M2 + M1 --> M3 --> A1 --> A2 --> A3 + A3 --> C1 --> C2 --> C5 --> B1 + A3 --> C4 --> B1 + CF1 --> C1 + B1 --> B2 + B1 --> B3 + B1 --> B4 + E2 --> SV1 --> SV2 --> B4 + SV1 --> SV3 + CF1 --> CF2 + CF1 --> CF3 --> A1 +
+
+
+ +
+
+
5
+
安全模型 - 三层防线
+
+
+
+graph TB + L1A["浏览器发送Init携带hmac_seed"] + L1B["sgClaw回InitAck分配agent_id"] + L1C["派生Session Key SHA256"] + L1D["未完成握手拒绝运行"] + L1A --> L1B --> L1C --> L1D + L2A["加载rules.json解析规则"] + L2B["Domain白名单校验去掉协议路径端口"] + L2C["Action黑白名单双重过滤"] + L2D["本地仪表盘特殊处理"] + L2A --> L2B + L2A --> L2C + L2A --> L2D + L3A["序列号关联校验"] + L3B["HMAC-SHA256签名验证"] + L3C["域名与页面上下文匹配"] + L3D["非法参数拒绝执行"] + L3A --> L3B --> L3C --> L3D + L1D ==> L2A + L2B ==> L3A + L2C ==> L3A + L2D ==> L3A + classDef l1Class fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef l2Class fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef l3Class fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + class L1A,L1B,L1C,L1D l1Class + class L2A,L2B,L2C,L2D l2Class + class L3A,L3B,L3C,L3D l3Class +
+
+
+ +
+
+
6
+
Skill体系与执行路径
+
+
+
+graph TB + SD1["SKILL.toml元数据"] + SD2["tools数组kind定义"] + SD3["prompts数组触发条件"] + SD4["scripts目录JS脚本"] + SL1["ZeroClaw Skill Loader扫描"] + SL2["BrowserScriptSkillTool创建执行器"] + SL3["命名规范skill.tool"] + EP1["路径A LLM驱动"] + EP2["路径B Deterministic Submit"] + EP3["路径C Direct Skill Runtime"] + BE1["Eval包装脚本注入args"] + BE2["Action Eval执行"] + BE3["返回ToolResult结构化JSON"] + SD1 --> SD2 --> SD4 + SD2 --> SD3 + SD1 --> SL1 --> SL2 --> SL3 + SL3 --> EP1 + SL3 --> EP2 + SL3 --> EP3 + EP1 --> BE1 --> BE2 --> BE3 + EP2 --> BE1 + EP3 --> BE1 +
+
+
+ +
+
+
7
+
Helper Page机制 - Service Mode核心桥接
+
+
+
+graph TB + WS["WebSocket Server监听42321"] + HTTP["Callback HTTP Server监听17888"] + CB["BrowserCallbackBackend交互"] + Helper["Helper Page Tab辅助页"] + Target1["业务页面1线损系统"] + Target2["业务页面2平台页面"] + HP1["WebSocket连接特权API"] + HP2["轮询Command长轮询"] + HP3["推送Events POST回调"] + HP4["回调函数注册"] + WS --> CB --> HTTP --> HP2 + HP1 --> Target1 + HP1 --> Target2 + HP2 --> Target1 + HP2 --> Target2 + Target1 --> HP4 --> HP3 --> HTTP + HTTP --> CB --> WS + classDef svcClass fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef tabClass fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef hpClass fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + class WS,HTTP,CB svcClass + class Helper,Target1,Target2 tabClass + class HP1,HP2,HP3,HP4 hpClass +
+
+
+ +
+
+
8
+
线损确定性提交流程 - 用户输入到Excel导出
+
+
+
+sequenceDiagram + participant User as 用户 + participant Host as 浏览器宿主 + participant Agent as Agent/TaskRunner + participant DS as DeterministicSubmit + participant Skill as collect_lineloss + participant Backend as BrowserBackend + participant Browser as 线损浏览器页面 + participant Rust as Rust xlsx导出 + User->>Host: 输入指令:帮我查本月线损率 + Host->>Agent: SubmitTask + Agent->>DS: decide_deterministic_submit + Note over DS: 指令以句号结尾且包含线损关键词 + DS-->>Agent: Execute执行计划 + Agent->>Skill: execute_browser_script + Skill->>Backend: Action Eval + Backend->>Browser: sgBrowserExcuteJsCodeByDomain + Browser->>Browser: validatePageContext + Browser->>Browser: buildRequest + Browser->>Browser: ajax查询API + Browser-->>Backend: 返回JSON + Backend-->>Skill: ToolResult + Skill-->>Agent: artifact + Agent->>Rust: export_lineloss_xlsx + Rust->>Rust: 生成xlsx文件 + Rust-->>Agent: 导出完成 + Agent-->>Host: TaskComplete + Host-->>User: 展示结果打开Excel +
+
+
+ +
+
+
9
+
平台浏览器与sgClaw交互边界
+
+
+
+graph TB + PlatformBrowser["平台浏览器Chromium"] + sgClawProcess["sgClaw进程Rust"] + PP1["场景页Vue实例window.mac"] + PP2["mutableSystemList子系统账号池"] + PP3["getLogint登录编排方法"] + TP1["线损系统20.76.57.61"] + TP2["其他子系统"] + BC1["sgBrowserExcuteJsCodeByDomain按域名执行JS"] + BC2["sgHideBrowerserOpenPage打开隐藏页面"] + BC3["sgBrowserCallAfterLoaded加载后执行JS"] + BC4["callBackJsToCpp JS到C++回调"] + T1["Transport层STDIO传输"] + T2["MAC Policy加HMAC安全校验"] + T3["Agent/TaskRunner任务分发器"] + T4["Compat层ZeroClaw兼容"] + T5["Browser Backend浏览器后端"] + PP1 --> PP2 + PP1 --> PP3 + PP3 -.-> TP1 + T1 --> PlatformBrowser + PlatformBrowser --> T1 + T3 --> T4 --> T5 + T5 --> BC1 + T5 --> BC2 + T5 --> BC3 + BC4 -.-> T5 + PlatformBrowser -.-> sgClawProcess + classDef browserSide fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef sgclawSide fill:#4a2c17,stroke:#e65100,color:#e6edf3 + class PlatformBrowser,PP1,PP2,PP3,TP1,TP2,BC1,BC2,BC3,BC4 browserSide + class sgClawProcess,T1,T2,T3,T4,T5 sgclawSide +
+
+
+ +
+
+
10
+
模块文件映射表
+
+
+ + + + + + + + + + + + +
模块主要源文件职责说明
pipe传输层src/pipe/mod.rs transport.rs handshake.rs browser_tool.rsSTDIO读写 握手流程 消息编码解码 HMAC签名
security安全层src/security/mod.rs mac_policy.rs hmac.rsMAC Policy加载 域名白名单 动作黑白名单 HMAC签名
agent消息路由src/agent/mod.rs task_runner.rs消息分发 任务解析 Deterministic Submit检测
browser后端抽象src/browser/mod.rs callback_backend.rs callback_host.rs ws_protocol.rsBrowserBackend接口 Pipe/WS/Callback实现
compat兼容层src/compat/mod.rs runtime.rs deterministic_submit.rs browser_script_skill_tool.rsZeroClaw运行时构建 线损快速通道 Skill执行
service服务模式src/service/mod.rs session.rsWS服务器 单客户端单任务模型
config配置src/config/mod.rs settings.rsSettings加载 Provider配置 Backend选择
runtime引擎src/runtime/mod.rs engine.rs tool_policy.rsAgent实例构建 ToolPolicy权限控制
+
+
+ +
+ + + + \ No newline at end of file diff --git a/docs/sgClaw系统架构全景图.md b/docs/sgClaw系统架构全景图.md new file mode 100644 index 0000000..7176bfc --- /dev/null +++ b/docs/sgClaw系统架构全景图.md @@ -0,0 +1,494 @@ +# sgClaw 系统架构全景图 + +**文档版本**: 1.0
+**适用项目**: sgClaw
+**编制日期**: 2026-04-15 + +--- + +## 1. 系统边界总览 + +```mermaid +graph TB + subgraph BrowserHost["浏览器宿主 (SuperRPA / Chromium)"] + direction TB + H1["Launch Config
启动配置"] + H2["Chromium 子进程管理
启动/监控 sgClaw"] + H3["Browser Command 执行器
click/type/navigate/eval/..."] + H4["HMAC 复检 + 域名校验
宿主侧安全边界"] + H5["Frontend Bundle
展示面 (Vue 2 页面)"] + + H1 --> H2 + H2 --> H3 + H3 --> H4 + H4 -.展示.-> H5 + end + + subgraph sgClawProcess["sgClaw 进程 (Rust)"] + direction TB + S1["Transport 层
STDIO / WebSocket"] + S2["Security 层
MAC Policy + HMAC 签名"] + S3["Agent 层
消息路由 + 任务分发"] + S4["Compat 层
ZeroClaw 运行时 + Skill 工具链"] + S5["Browser Backend 抽象
Pipe / WS / Callback / Bridge"] + S6["Config 层
Runtime Config + 环境变量"] + + S1 --> S2 + S2 --> S3 + S3 --> S4 + S4 --> S5 + S6 -.配置注入.-> S4 + end + + subgraph ZeroClawCore["ZeroClaw 核心 (vendored)"] + direction TB + Z1["Planner / Executor
任务分解与执行"] + Z2["Tool Loop
工具调用循环"] + Z3["Skills / Memory
技能加载与记忆"] + Z4["Provider Dispatch
LLM 路由"] + Z5["Prompt Builder
System Prompt 组装"] + + Z1 --> Z2 + Z2 --> Z3 + Z3 --> Z4 + Z5 --> Z1 + end + + subgraph ExternalServices["外部服务"] + direction TB + E1["LLM Provider
DeepSeek / OpenAI / Claude"] + E2["平台浏览器页面
业务页面 + 隐藏域"] + end + + BrowserHost <-->|"STDIO JSON Line
AgentMessage / BrowserMessage"| sgClawProcess + sgClawProcess <-->|"Rust API 调用|vendored"| ZeroClawCore + ZeroClawCore <-->|"HTTP API|内部调用"| ExternalServices + sgClawProcess <-->|"Pipe Mode: STDIO
Service Mode: WS|Browser Backend| ExternalServices +``` + +--- + +## 2. 双部署模式架构 + +### 2.1 Pipe Mode (STDIO) — 传统嵌入模式 + +```mermaid +sequenceDiagram + participant Host as 浏览器宿主 (Chromium) + participant Pipe as StdioTransport + participant MAC as MAC Policy + participant Agent as Agent / TaskRunner + participant ZC as ZeroClaw Runtime + participant Backend as PipeBrowserBackend + participant Tool as BrowserPipeTool + participant HostExec as 宿主 Command 执行器 + + Note over Host,HostExec: Pipe Mode: 一问一答式 STDIO + + Host->>Pipe: Init {version, hmac_seed, capabilities} + Pipe->>Pipe: derive_session_key(hmac_seed) + Pipe-->>Host: InitAck {version, agent_id, supported_actions} + + Host->>Agent: SubmitTask {instruction, page_url, page_title} + Agent->>Agent: resolve_submit_instruction() + alt deterministic_submit (如 线损。。。) + Agent->>Agent: 生成 DeterministicExecutionPlan + Agent->>Tool: execute_browser_script_skill_raw_output + else 通用 LLM 驱动 + Agent->>ZC: 构造 ZeroClaw Agent + ZC->>Tool: tool loop: browser_action + end + + Tool->>MAC: validate(domain, action) + MAC-->>Tool: allow / deny + + Tool->>Backend: invoke(action, params) + Backend->>Pipe: AgentMessage::Command {seq, action, params, hmac} + Pipe-->>Host: stdout: Command JSON + + Host->>HostExec: 执行浏览器命令 + HostExec-->>Host: 执行结果 + Host->>Pipe: BrowserMessage::Response {seq, success, data} + Pipe-->>Backend: Response 回包 + Backend-->>Tool: CommandOutput + Tool-->>ZC: ToolResult + ZC-->>Agent: tool loop 继续或完成 + Agent-->>Host: TaskComplete {success, summary} +``` + +### 2.2 Service Mode (TCP + WebSocket) — 独立服务模式 + +```mermaid +sequenceDiagram + participant Console as 前端控制台 (浏览器) + participant WSS as WebSocket Server
(127.0.0.1:42321) + participant Agent as Agent / TaskRunner + participant Callback as BrowserCallbackBackend + participant HTTP as Callback HTTP Server
(127.0.0.1:17888) + participant Helper as Helper Page
(浏览器内嵌辅助页) + participant Target as 目标业务页面 + + Note over Console,Target: Service Mode: 持久化服务 + Helper Page 桥接 + + Console->>WSS: WebSocket Connect + WSS->>Callback: 创建会话 + + Console->>WSS: ClientMessage::SubmitTask + WSS->>Agent: 分发任务 + Agent->>Callback: BrowserBackend::invoke() + + callback Backend 内部流程: + Callback->>Helper: 通过 HTTP Server 推送 Command + Helper->>Target: sgBrowserExcuteJsCodeByDomain
在目标域执行 JS + + Target-->>Helper: callBackJsToCpp / XHR POST + Helper->>HTTP: POST /sgclaw/callback/events + HTTP-->>Callback: Callback 事件回传 + + Callback-->>Agent: CommandOutput + Agent-->>WSS: ServiceMessage::TaskComplete + WSS-->>Console: WebSocket 推送结果 +``` + +--- + +## 3. sgClaw 内部模块关系 + +```mermaid +graph LR + subgraph EntryPoints["入口点"] + E1["src/main.rs
sgclaw::run()"] + E2["src/service/mod.rs
service::run()"] + end + + subgraph PipeLayer["pipe 层 — 传输与协议"] + P1["StdioTransport
STDIO 读写"] + P2["BrowserMessage / AgentMessage
消息枚举定义"] + P3["Handshake
握手协议"] + P4["BrowserPipeTool
发送 Command / 等待 Response"] + P5["HMAC 签名
sign_command"] + end + + subgraph SecurityLayer["security 层 — 安全策略"] + M1["MacPolicy
从 rules.json 加载规则"] + M2["Domain Allowlist
域名白名单校验"] + M3["Action Allowlist/Blocklist
动作黑白名单"] + end + + subgraph AgentLayer["agent 层 — 消息路由与任务分发"] + A1["handle_browser_message_with_context
消息分发"] + A2["TaskRunner
任务解析与执行"] + A3["resolve_submit_instruction
Deterministic Submit 检测"] + end + + subgraph CompatLayer["compat 层 — ZeroClaw 兼容"] + C1["RuntimeEngine
构建 Agent 实例"] + C2["ToolPolicy
工具权限控制"] + C3["BrowserScriptSkillTool
Skill browser_script 执行"] + C4["DeterministicSubmit
线损确定性提交"] + C5["BrowserToolAdapter
ZeroClaw 工具适配"] + C6["ConfigAdapter
配置转换"] + end + + subgraph BrowserLayer["browser 层 — 浏览器后端"] + B1["BrowserBackend trait
统一接口"] + B2["PipeBrowserBackend
Pipe Mode 实现"] + B3["WsBrowserBackend
WebSocket 直接连接"] + B4["BrowserCallbackBackend
Helper Page 桥接"] + B5["BridgeBrowserBackend
网桥模式"] + end + + subgraph ServiceLayer["service 层 — 服务模式"] + SV1["WebSocket Server
TCP 监听"] + SV2["Session Manager
单客户端单任务"] + SV3["Callback HTTP Server
辅助页通信"] + end + + subgraph ConfigLayer["config 层 — 运行时配置"] + CF1["SgClawSettings
从 JSON / 环境变量加载"] + CF2["Provider Config
API Key / Model"] + CF3["Backend Selection
Pipe vs Service"] + end + + E1 --> P1 + E2 --> SV1 + + P1 --> P2 + P2 --> P3 + P3 --> P4 + P4 --> P5 + + P5 --> M1 + M1 --> M2 + M1 --> M3 + + M3 --> A1 + A1 --> A2 + A2 --> A3 + + A3 --> C1 + A3 --> C4 + C1 --> C2 + C1 --> C3 + C2 --> C5 + C6 --> C1 + + C3 --> B1 + C4 --> B1 + C5 --> B1 + + B1 --> B2 + B1 --> B3 + B1 --> B4 + B1 --> B5 + + SV1 --> SV2 + SV1 --> SV3 + SV2 --> B4 + + CF1 --> CF2 + CF1 --> CF3 + CF3 --> A1 +``` + +--- + +## 4. 安全模型三层防线 + +```mermaid +graph TB + subgraph Layer1["第一层: 握手与会话完整性"] + L1A["Browser 发送 Init
携带 hmac_seed"] + L1B["sgClaw 回 InitAck
分配 agent_id"] + L1C["派生 Session Key
SHA256(hmac_seed + salt)"] + L1D["未完成握手
拒绝进入运行态"] + + L1A --> L1B --> L1C --> L1D + end + + subgraph Layer2["第二层: Rust 侧 MAC Policy"] + L2A["加载 rules.json
version, domains, actions"] + L2B["Domain 白名单校验
strip scheme/path/port"] + L2C["Action 黑白名单
allowed + blocked 双重过滤"] + L2D["本地仪表盘特殊处理
__sgclaw_local_dashboard__"] + + L2A --> L2B + L2A --> L2C + L2A --> L2D + end + + subgraph Layer3["第三层: 宿主侧命令执行约束"] + L3A["序列号关联校验"] + L3B["HMAC-SHA256 签名验证"] + L3C["域名与页面上下文匹配"] + L3D["非法参数拒绝执行"] + + L3A --> L3B --> L3C --> L3D + end + + Layer1 ==>|"Session Key"| Layer2 + Layer2 ==>|"Command + HMAC"| Layer3 +``` + +--- + +## 5. Skill 体系与执行路径 + +```mermaid +graph TB + subgraph SkillDefinition["Skill 定义 (SKILL.toml)"] + SD1["skill metadata
name, version, description"] + SD2["tools 数组
kind: browser_script / http_request / ..."] + SD3["prompts 数组
触发条件描述"] + SD4["scripts/ 目录
JS 脚本文件"] + end + + subgraph SkillLoading["Skill 加载"] + SL1["ZeroClaw Skill Loader
从 skillsDir 扫描"] + SL2["BrowserScriptSkillTool
为每个 tool 创建执行器"] + SL3["命名: {skill_name}.{tool_name}"] + end + + subgraph ExecutionPaths["执行路径"] + EP1["路径 A: LLM 驱动
Agent tool loop → browser_action"] + EP2["路径 B: Deterministic Submit
指令匹配 → 直接执行 (无 LLM)"] + EP3["路径 C: Direct Skill Runtime
配置指定 skill → 直接执行"] + end + + subgraph BrowserExecution["浏览器侧执行"] + BE1["Eval 包装
(function() { const args = {...}; ... })()"] + BE2["Action::Eval
通过 BrowserBackend 执行"] + BE3["返回 ToolResult
结构化结果"] + end + + SD1 --> SD2 --> SD4 + SD2 --> SD3 + + SD1 --> SL1 --> SL2 --> SL3 + + SL3 --> EP1 + SL3 --> EP2 + SL3 --> EP3 + + EP1 --> BE1 + EP2 --> BE1 + EP3 --> BE1 + + BE1 --> BE2 --> BE3 +``` + +--- + +## 6. Helper Page 机制 (Service Mode) + +```mermaid +graph TB + subgraph sgClawService["sgClaw Service 进程"] + WS["WebSocket Server
:42321"] + HTTP["HTTP Server
:17888"] + CB["BrowserCallbackBackend"] + end + + subgraph BrowserTabs["浏览器 Tab 页"] + Helper["Helper Page Tab
/sgclaw/browser-helper.html"] + Target1["业务页面 1
20.76.57.61:18080/..."] + Target2["业务页面 2
25.215.213.128:18080/..."] + end + + subgraph HelperPage["Helper Page 内部"] + HP1["WebSocket 连接
ws://127.0.0.1:12345"] + HP2["轮询 Command
GET /sgclaw/callback/commands/next"] + HP3["推送 Events
POST /sgclaw/callback/events"] + HP4["回调函数注册
sgclawOnClickProbe / sgclawOnEval / ..."] + end + + WS -->|"WebSocket"| CB + CB -->|"推送 Command"| HTTP + HTTP -->|long-poll| HP2 + + HP1 -->|"浏览器 WebSocket API"| Target1 + HP1 -->|"浏览器 WebSocket API"| Target2 + + HP2 -->|"执行 JS 命令
sgBrowserExcuteJsCodeByDomain|Target1 + HP2 -->|"执行 JS 命令
sgBrowserExcuteJsCodeByDomain|Target2 + + Target1 -->|"callBackJsToCpp|HP4 + HP3 -->|"XHR POST|HTTP + HP4 --> HP3 + + HTTP -->|"Callback 事件|CB + CB -->|"ToolResult|WS +``` + +--- + +## 7. 线损确定性提交流程 (Deterministic Submit) + +```mermaid +sequenceDiagram + participant User as 用户 + participant Host as 浏览器宿主 + participant Agent as Agent / TaskRunner + participant DS as DeterministicSubmit + participant Skill as BrowserScriptSkillTool
(collect_lineloss) + participant Backend as BrowserBackend + participant Browser as 浏览器页面
(线损域) + participant Rust as Rust 侧
xlsx 导出 + + User->>Host: 输入: "帮我查本月线损率。。。" + Host->>Agent: SubmitTask {instruction} + + Agent->>DS: decide_deterministic_submit() + Note over DS: 指令以 "。。。" 结尾
且包含 "线损" 关键词 + DS-->>Agent: Execute(DeterministicExecutionPlan) + + Agent->>Skill: execute_browser_script_skill_raw_output() + Skill->>Backend: Action::Eval {script: collect_lineloss.js} + Backend->>Browser: sgBrowserExcuteJsCodeByDomain
(20.76.57.61, js_code) + + Browser->>Browser: validatePageContext(args) + Browser->>Browser: buildMonthRequest / buildWeekRequest + Browser->>Browser: $.ajax 查询线损 API + Browser-->>Backend: 返回 report-artifact JSON + Backend-->>Skill: ToolResult + Skill-->>Agent: artifact {status, rows, column_defs} + + Agent->>Rust: export_lineloss_xlsx(artifact) + Rust->>Rust: 生成 .xlsx 文件 + Rust-->>Agent: 导出完成 + Agent-->>Host: TaskComplete {success: true} + Host-->>User: 展示结果 + 打开 Excel +``` + +--- + +## 8. 平台浏览器与 sgClaw 的交互边界 + +```mermaid +graph TB + subgraph PlatformBrowser["平台浏览器 (Chromium)"] + direction TB + subgraph PlatformPages["平台场景页面"] + PP1["场景页 Vue 实例
window.mac"] + PP2["mutableSystemList
子系统账号池"] + PP3["getLogint / loginStatusTing
子系统登录编排"] + end + + subgraph TargetPages["目标业务页面"] + TP1["线损系统
20.76.57.61:18080"] + TP2["其他子系统"] + end + + subgraph BrowserCapabilities["浏览器特权能力"] + BC1["sgBrowserExcuteJsCodeByDomain
按域名执行 JS"] + BC2["sgHideBrowerserOpenPage
打开隐藏页面"] + BC3["sgBrowserCallAfterLoaded
页面加载后执行 JS"] + BC4["callBackJsToCpp
JS → C++ 回调"] + end + + PP1 --> PP2 + PP1 --> PP3 + end + + subgraph sgClawProcess["sgClaw 进程"] + direction TB + subsgClawTransport["Transport 层"] + subgClawSecurity["MAC Policy + HMAC"] + subgClawAgent["Agent / TaskRunner"] + subgClawCompat["Compat 层"] + subgClawBackend["Browser Backend"] + end + + subgClawTransport <-->|"STDIO JSON Line
AgentMessage / BrowserMessage|PlatformBrowser + subgClawAgent --> subgClawCompat + subgClawCompat --> subgClawBackend + subgClawBackend -->|"BrowserAction
sgBrowserExcuteJsCodeByDomain|BC1 + subgClawBackend -->|"BrowserAction
sgHideBrowerserOpenPage|BC2 + subgClawBackend -->|"BrowserAction
sgBrowserCallAfterLoaded|BC3 + + BC4 -.回调.-> subgClawBackend + + PlatformBrowser -.安全边界.-> sgClawProcess + + classDef browserSide fill:#e3f2fd,stroke:#1565c0,color:#000 + classDef sgclawSide fill:#fff3e0,stroke:#e65100,color:#000 + classDef interaction fill:#f3e5f5,stroke:#7b1fa2,color:#000 + + class PlatformBrowser,PlatformPages,TargetPages,BrowserCapabilities browserSide + class sgClawProcess,subgClawTransport,subgClawSecurity,subgClawAgent,subgClawCompat,subgClawBackend sgclawSide +``` + +--- + +## 9. 模块文件映射 + +| 模块 | 主要文件 | 职责 | +|---|---|---| +| **pipe 传输层** | `src/pipe/mod.rs`, `src/pipe/transport.rs`, `src/pipe/handshake.rs`, `src/pipe/browser_tool.rs` | STDIO 读写、握手、消息编码解码、HMAC 签名、Command 发送与 Response 等待 | +| **security 安全层** | `src/security/mod.rs`, `src/security/mac_policy.rs`, `src/security/hmac.rs` | MAC Policy 加载与校验、Session Key 派生、命令签名 | +| **agent 消息路由** | `src/agent/mod.rs`, `src/agent/task_runner.rs` | 接收 BrowserMessage 并分发、任务解析、Deterministic Submit 检测 | +| **browser 后端抽象** | `src/browser/mod.rs`, `src/browser/callback_backend.rs`, `src/browser/callback_host.rs`, `src/browser/ws_protocol.rs` | BrowserBackend trait 定义、Pipe/WS/Callback/Bridge 四种实现 | +| **compat 兼容层** | `src/compat/mod.rs`, `src/compat/runtime.rs`, `src/compat/deterministic_submit.rs`, `src/compat/browser_script_skill_tool.rs` | ZeroClaw 运行时构建、线损确定性提交、Skill browser_script 执行 | +| **service 服务模式** | `src/service/mod.rs`, `src/service/session.rs` | WebSocket 服务器、客户端会话管理、单任务并发模型 | +| **config 运行时配置** | `src/config/mod.rs`, `src/config/settings.rs` | SgClawSettings 加载、Provider 配置、Backend 选择 | +| **runtime 运行时引擎** | `src/runtime/mod.rs`, `src/runtime/engine.rs`, `src/runtime/tool_policy.rs` | RuntimeEngine 构建 Agent、ToolPolicy 工具权限控制 | diff --git a/docs/sgClaw组件职责与流转全景图.html b/docs/sgClaw组件职责与流转全景图.html new file mode 100644 index 0000000..58b9362 --- /dev/null +++ b/docs/sgClaw组件职责与流转全景图.html @@ -0,0 +1,645 @@ + + + + + + sgClaw 智能浏览器自动化平台 - 组件职责与流转全景图 + + + + +
+

sgClaw 智能浏览器自动化平台

+
核心组件职责与流转全景图 - 每个组件是什么 做什么 什么时候调用
+
+
+ + +
+
+
1
+
全景概览 - 从用户指令到浏览器执行的完整链路
+
+
+
+ 当用户说出"帮我查本月线损率"时,sgClaw 内部多个组件协同工作。以下是完整的执行链路,展示每个组件在哪个环节被调用、承担什么职责。 +
+
+graph TB + U["用户\n输入自然语言指令"] -->|"1. SubmitTask"| GW["通信网关\nSTDIO Pipe / Service WS\n接收请求 建立会话"] + GW -->|"2. 加载配置"| CFG["SgClawSettings\n加载 sgclaw_config.json\nLLM Provider RuntimeProfile SkillsDir"] + CFG -->|"3. 四级路由决策"| RT["Agent Runtime\ntask_runner 任务调度"] + + RT -->|"3a. 匹配场景"| DS["确定性执行\ndeterministic_submit\nscene_platform 匹配场景清单\n直接执行预设脚本 无需LLM"] + RT -->|"3b. 主编排"| PO["主编排路径\nzeroclaw_process_message_primary\n完整Agent工具循环 LLM自主规划"] + RT -->|"3c. 直连技能"| DSK["直连技能路径\ndirect_skill_primary\n配置指定skill.tool直接执行"] + RT -->|"3d. 标准LLM"| ZC["标准LLM路径\ncompat_llm_primary\nzeroclaw agent turn 默认回退"] + + DS -->|"4. 执行操作"| BB["浏览器后端\nBrowserBackend trait\nPipeBrowser / WsBrowser"] + PO -->|"4. 调用工具"| BB + DSK -->|"4. 执行操作"| BB + ZC -->|"4. 调用工具"| BB + + BB -->|"5. 安全校验"| SC["MAC Policy\n检查 rules.json\n域名白名单 动作白名单 HMAC"] + SC -->|"6. 执行命令"| EXT["SuperRPA Chromium\n执行实际DOM操作\n导航 点击 输入 读取"] + EXT -->|"7. 返回结果"| BB + BB -->|"8. 结果回传"| RT + RT -->|"9. 后处理"| PH["Report Artifact\nopenxml_office 生成Excel\nscreen_html_export 生成大屏"] + PH -->|"10. TaskComplete"| GW + GW -->|"11. 结果"| U + + classDef userNode fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef coreNode fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef routeNode fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef extNode fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + classDef cfgNode fill:#484f58,stroke:#8b949e,color:#e6edf3 + + class U userNode + class GW,RT,BB,PH coreNode + class DS,PO,DSK,ZC routeNode + class SC,EXT extNode + class CFG cfgNode +
+
+
+ + +
+
+
2
+
核心组件详解 - 职责 调用时机 输入输出
+
+
+
+ 以下是每个核心组件的详细说明。点击卡片可查看什么时候调用输入什么输出什么。 +
+
+
+

内部通信网关

+

负责接收用户请求、建立会话、返回最终结果。支持两种模式:STDIO Pipe(默认,与浏览器宿主通过 stdin/stdout JSON Line 通信)和 Service WS(WebSocket 服务模式,接受外部客户端连接)。

+
+
何时调用:用户发起请求时第一时间响应
+
输入:SubmitTask 消息(指令 conversationId pageUrl pageTitle)
+
输出:TaskComplete LogEntry StatusChanged 消息
+
+
+ +
+

内部Agent Runtime 任务调度

+

run_submit_task() 是任务执行入口。依次执行四级路由决策:① deterministic_submit 确定性场景匹配 ② primary_orchestration 主编排 ③ direct_submit_skill 直连技能 ④ compat_llm_primary 标准LLM回退。

+
+
何时调用:SubmitTask 消息到达后
+
输入:指令 AgentRuntimeContext BrowserPipeTool
+
输出:AgentMessage::TaskComplete
+
+
+ +
+

内部场景平台 Scene Platform

+

扫描 skills/ 目录下的场景清单(scene.toml),解析 deterministic 段落的关键词规则。当用户指令匹配时,构建 DeterministicExecutionPlan(含 target_url org_code period_mode 等执行参数),直接执行预设脚本。

+
+
何时调用:四级路由决策第一步
+
输入:用户指令 pageUrl pageTitle skills目录
+
输出:DeterministicExecutionPlan 或 NotDeterministic
+
+
+ +
+

内部SgClawSettings 配置管理

+

从 JSON 配置文件或环境变量加载运行时配置:多 Provider 管理(apiKey baseUrl model)、Runtime Profile、SkillsDir、BrowserBackend 类型、OfficeBackend、Service WS 监听地址等。

+
+
何时调用:每次任务提交时加载
+
输入:sgclaw_config.json 或环境变量
+
输出:SgClawSettings 结构体
+
+
+ +
+

内部Runtime Engine 运行时引擎

+

根据 Runtime Profile(BrowserAttached/BrowserHeavy/GeneralAssistant)构建 Tool Policy 白名单,加载技能包,注入 Memory,构建 Agent 实例。同时负责指令增强(附加浏览器合约提示、检测特定任务类型)。

+
+
何时调用:主编排路径和标准LLM路径构建Agent时
+
核心方法:build_agent() build_instruction()
+
Profile:BrowserAttached / BrowserHeavy / GeneralAssistant
+
+
+ +
+

外部ZeroClaw Core 智能体核心

+

位于 third_party/zeroclaw/ 的 vendored Agent 核心库。提供 Agent 构建、Provider 管理、工具循环、Memory 接口、技能加载、Prompt 组装等核心能力。sgClaw 在其基础上叠加安全信封层。

+
+
何时调用:主编排和标准LLM路径中
+
位置:third_party/zeroclaw/
+
核心能力:Agent Provider ToolLoop Memory Skills
+
+
+ +
+

内部Browser Backend 浏览器后端

+

统一的浏览器操作接口(BrowserBackend trait)。两种实现:PipeBrowserBackend(通过 STDIO 与宿主通信)和 WsBrowserBackend(通过 WebSocket 直连 DevTools)。支持 SuperRpa/AgentBrowser/RustNative/ComputerUse 多种后端类型。

+
+
何时调用:需要操作浏览器时
+
支持操作:navigate click type getText eval select scrollTo 等15种
+
+
+ +
+

内部MAC Policy 安全策略

+

从 resources/rules.json 加载安全规则。三层安全模型:①握手时 HMAC seed 交换和会话密钥派生 ②Rust 侧域名+动作白名单校验 ③宿主侧 HMAC 二次验证。拒绝不在白名单的域名和被禁用的动作。

+
+
何时调用:每次浏览器操作执行前
+
检查项:域名白名单 动作类型 HMAC验证
+
+
+ +
+

外部SuperRPA Chromium 浏览器宿主

+

实际执行 DOM 操作的外部系统。接收 sgClaw 的 Command(含 HMAC),验证后执行 navigate/click/type/getText 等操作,返回 Response(含操作结果 + HMAC)。STDIO 模式下与 sgClaw 进程通过管道通信。

+
+
何时调用:BrowserBackend 发送命令时
+
通信协议:STDIO JSON Line 或 WebSocket
+
+
+
+
+
+ + +
+
+
3
+
LLM 大模型工作全流程 - 从语义识别到任务规划
+
+
+
+ 当用户指令无法匹配已知技能时,LLM 大模型开始工作。以下是大模型从理解用户意图到生成可执行计划的完整过程。 +
+
+
+
1
+
+

语义识别 - 理解用户说了什么

+

LLM 接收用户自然语言指令,识别用户的真实意图。例如"帮我查本月线损率" → 识别为"查询线损率数据"。

+
+
+
+
2
+
+

场景匹配 - 判断是否为已知场景

+

结合 Memory(记忆模块)中存储的历史任务记录,判断该指令是否与已有技能匹配。如果匹配,转交快速通道执行。

+
+
+
+
3
+
+

任务拆解 - 将大目标分解为小步骤

+

如果是新场景,LLM 将用户目标拆解为具体的、可操作的步骤序列。例如:打开系统 → 选择月份 → 点击查询 → 读取数据 → 导出Excel。

+
+
+
+
4
+
+

工具选择 - 决定用什么能力完成任务

+

LLM 根据步骤需求,从可用工具库中选择合适的工具。例如:需要打开网页选择"导航工具",需要点击按钮选择"点击工具",需要读取数据选择"读取工具"。

+
+
+
+
5
+
+

参数填充 - 确定每个工具的具体参数

+

LLM 为每个工具填充具体参数。例如点击工具需要知道"点击哪个按钮",导航工具需要知道"打开哪个URL"。这些参数从用户指令和上下文中提取。

+
+
+
+
6
+
+

执行计划生成 - 输出可执行的JSON/结构化指令

+

LLM 将拆解的步骤、选择的工具、填充的参数整合为结构化的执行计划,交由工具执行引擎依次执行。

+
+
+
+
7
+
+

循环迭代 - 根据执行结果动态调整

+

如果某一步执行失败或结果不符合预期,LLM 会收到反馈,重新规划后续步骤。例如页面打不开则尝试备用URL,元素找不到则换选择器。

+
+
+
+
+
+ + +
+
+
4
+
Memory 技能管理 与 Runtime Engine - 运行时核心引擎
+
+
+
+ sgClaw 的运行时核心由三大引擎协同工作:Memory(记忆模块)负责持久化存储对话历史与任务状态,技能管理系统负责加载和注入技能包到 Agent,Runtime Engine负责根据 Runtime Profile 构建完整的 Agent 运行环境(工具策略 + 技能加载 + 指令增强)。 +
+
+graph TB + subgraph Memory["Memory 记忆模块 zeroclaw::memory"] + M1["SQLite 存储 brain.db\n对话历史 任务状态 执行结果"] + M2["Memory Trait 接口\ncreateMemoryWithStorage\n支持多种后端 SQLite/文件"] + M1 -.->|"读写"| M2 + end + + subgraph SkillMgmt["技能管理 Skills Management"] + S1["技能加载器\nloadSkillsFromDirectory\n按目录扫描技能包"] + S2["技能过滤器\n按浏览器可用性过滤\nbrowser_script 工具裁剪"] + S3["ReadSkill Tool\n运行时按需读取技能详情\n支持 open_skills 配置"] + S4["技能目录解析\nskills/ 默认目录\n自定义 skillsDir"] + S1 --> S2 + S4 --> S1 + S1 --> S3 + end + + subgraph RuntimeEngine["Runtime Engine 运行时引擎"] + R1["Runtime Profile\nBrowserAttached / BrowserHeavy / GeneralAssistant"] + R2["Tool Policy 工具策略\n按 Profile 维护工具白名单\nallowed_tools 列表"] + R3["Agent Builder\n组装 Provider + Tools + Memory + Skills\n构建完整 Agent 实例"] + R4["指令增强器\n附加浏览器合约提示\n检测知乎热榜/Excel导出/大屏任务"] + R1 -->|"决定"| R2 + R2 -->|"约束"| R3 + R3 -->|"使用"| R4 + end + + Memory -->|"注入"| RuntimeEngine + SkillMgmt -->|"注入"| RuntimeEngine + + classDef memFill fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef skillFill fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef runtimeFill fill:#4a2c17,stroke:#e65100,color:#e6edf3 + + class Memory,M1,M2 memFill + class SkillMgmt,S1,S2,S3,S4 skillFill + class RuntimeEngine,R1,R2,R3,R4 runtimeFill +
+
+
+

内部Memory 记忆模块

+

职责:基于 SQLite(brain.db)持久化存储对话历史、任务状态和执行结果。通过 zeroclaw::memory::Memory trait 提供统一接口,支持多种存储后端。

+
+
何时调用:Agent 构建时创建 每次 LLM 调用前后读写
+
调用者:Runtime Engine(build_agent 方法)
+
存储路径:workspace/memory/brain.db
+
+
+
+

内部技能管理系统

+

职责:从 skills/ 目录(或自定义 skillsDir)扫描加载技能包,按浏览器是否可用过滤 browser_script 工具,通过 ReadSkill Tool 让 Agent 按需读取技能详情。支持 open_skills 独立技能目录配置。

+
+
何时调用:每次 Agent 构建时加载技能列表
+
调用者:Runtime Engine(load_skills_for_surface)
+
技能来源:workspace/skills/ 或 skillsDir 配置
+
+
+
+

内部Runtime Engine

+

职责:运行时核心编排器。根据 Runtime Profile 决定工具白名单,加载技能,注入 Memory,构建 Agent 实例。同时负责指令增强(附加浏览器合约提示、检测特定任务类型如知乎热榜/Excel导出/大屏展示)。

+
+
何时调用:每次任务提交时 构建 Agent 前
+
核心方法:build_agent() build_instruction()
+
Profile:BrowserAttached / BrowserHeavy / GeneralAssistant
+
+
+
+
+
+ + +
+
+
5
+
任务路由 - 四种执行路径决策树
+
+
+
+ 任务提交到 sgClaw 后,Agent Runtime 按优先级依次判断走哪条执行路径。这不是简单的"快速/AI"二选一,而是四级决策树。 +
+
+graph TB + A["SubmitTask 用户指令进入"] --> B["1. deterministic_submit\n场景平台匹配"] + B -->|"匹配已知确定场景"| C["确定性执行路径\ndeterministic_submit\n直接执行预设场景脚本"] + B -->|"未匹配 非确定性"| D["2. Primary Orchestration\nzeroclaw process_message"] + + D -->|"browser_surface_enabled\n且 should_use_primary"| E["主编排路径\nzeroclaw_process_message_primary\n完整 Agent 工具循环"] + D -->|"不满足条件"| F["3. direct_submit_skill\n配置了直连技能"] + + F -->|"directSubmitSkill已配置"| G["直连技能路径\ndirect_skill_primary\n绕过Agent直接执行"] + F -->|"未配置"| H["4. compat_llm_primary\n标准LLM路径\nzeroclaw agent turn"] + + C --> I["TaskComplete 返回结果"] + E --> I + G --> I + H --> I + + classDef routeFill fill:#e65100,stroke:#ff6d00,color:#fff + classDef path1Fill fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + classDef path2Fill fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef path3Fill fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef path4Fill fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef endFill fill:#484f58,stroke:#8b949e,color:#e6edf3 + + class B,D,F routeFill + class C path1Fill + class E path2Fill + class G path3Fill + class H path4Fill + class I endFill +
+
+
+
1
+
+

确定性场景匹配 - deterministic_submit

+

通过 scene_platform 模块扫描 skills/ 目录下的场景清单(scene.toml),匹配指令关键词、URL、页面标题。匹配成功则构建 DeterministicExecutionPlan,直接执行场景预设的浏览器脚本,无需 LLM 参与。典型场景:线损查询、报表导出等固定流程。

+
+
+
+
2
+
+

主编排路径 - zeroclaw_process_message_primary

+

当 Runtime Profile 启用浏览器工具(browser_surface_enabled)且 orchestration::should_use_primary 判定走主编排时,调用 zeroclaw 的 process_message 完整 Agent 循环。LLM 可以调用所有允许的工具(浏览器操作、技能工具等),支持多轮工具调用和动态规划。

+
+
+
+
3
+
+

直连技能路径 - direct_skill_primary

+

当配置中设置了 directSubmitSkill(格式:skillName.toolName),绕过正常 Agent 循环,直接执行指定的技能工具。适用于需要固定流程但又不适合确定性场景的中间态。

+
+
+
+
4
+
+

标准 LLM 路径 - compat_llm_primary

+

以上三条路都不通时的默认回退。创建标准 zeroclaw Agent turn,LLM 根据指令自主决定使用哪些工具。这是最灵活但也最慢的路径。

+
+
+
+
+
+ + +
+
+
6
+
浏览器执行全过程 - 从sgClaw到SuperRPA浏览器的命令传输
+
+
+
+ sgClaw 有两种浏览器后端模式:STDIO Pipe 模式(sgClaw 进程通过 stdin/stdout 与浏览器宿主通信)和 WebSocket 模式(直接连接浏览器 DevTools WebSocket)。安全校验在两种模式下都由 MAC Policy 层负责。 +
+
+graph TB + subgraph PipeMode["STDIO Pipe 模式(嵌入SuperRPA)"] + TE1["ZeroClawBrowserTool\n实现 zeroclaw::tools::Tool trait\n暴露 browser_action / superrpa_browser"] + SC1["MAC Policy 安全策略\n检查 rules.json 域名白名单\n动作白名单 HMAC验证"] + BC1["BrowserPipeTool\n分配 seq 计算 HMAC\n发送Command 等待Response"] + TP1["StdioTransport\nJSON Line 协议\nstdin/stdout 1MB限制"] + HOST1["浏览器宿主进程\nSuperRPA Chromium\n验证HMAC 执行DOM操作"] + + TE1 -->|"tool call"| SC1 + SC1 -->|"校验通过"| BC1 + BC1 -->|"Command + HMAC"| TP1 + TP1 -->|"JSON Line"| HOST1 + HOST1 -->|"Response + HMAC"| TP1 + TP1 -->|"匹配 seq 返回"| BC1 + BC1 -->|"结果"| TE1 + end + + subgraph WsMode["WebSocket 模式(独立运行)"] + TE2["ZeroClawBrowserTool\n相同的 Tool 接口"] + SC2["MAC Policy 相同的安全检查"] + BC2["WsBrowserBackend\nWebSocket 连接\nDevTools Protocol"] + WS1["WebSocket 协议层\ntungstenite 库"] + HOST2["浏览器 DevTools\nChrome DevTools Protocol"] + + TE2 -->|"tool call"| SC2 + SC2 -->|"校验通过"| BC2 + BC2 -->|"CDP Command"| WS1 + WS1 -->|"ws://host:port"| HOST2 + HOST2 -->|"CDP Response"| WS1 + WS1 -->|"结果"| BC2 + BC2 -->|"结果"| TE2 + end + + classDef teFill fill:#4a2c17,stroke:#e65100,color:#e6edf3 + classDef scFill fill:#2d1f3d,stroke:#9c27b0,color:#e6edf3 + classDef bcFill fill:#1a3a5c,stroke:#4a9eff,color:#e6edf3 + classDef tpFill fill:#484f58,stroke:#8b949e,color:#e6edf3 + classDef hostFill fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + + class TE1,TE2 teFill + class SC1,SC2 scFill + class BC1,BC2 bcFill + class TP1,WS1 tpFill + class HOST1,HOST2 hostFill +
+
+
+

内部ZeroClawBrowserTool

+

职责:实现 zeroclaw::tools::Tool trait,将 BrowserBackend 适配为 LLM 可调用的工具。暴露两个工具名:browser_action(传统别名)和 superrpa_browser(SuperRPA 专用,优先使用)。

+
+
何时调用:LLM 决定操作浏览器时
+
文件位置:compat/browser_tool_adapter.rs
+
+
+
+

内部MAC Policy 安全策略

+

职责:从 resources/rules.json 加载安全规则。三层安全检查:①握手时 HMAC seed 交换 ②Rust 侧域名+动作白名单校验 ③宿主侧 HMAC 二次验证。拒绝不在白名单的域名和被禁用的动作。

+
+
何时调用:每次浏览器工具调用前
+
规则文件:resources/rules.json
+
+
+
+

内部BrowserBackend 浏览器后端

+

职责:统一的浏览器操作接口(BrowserBackend trait)。两种实现:PipeBrowserBackend(通过 StdioTransport 与宿主通信)和 WsBrowserBackend(通过 WebSocket 直连 DevTools)。由 BrowserBackend 配置决定使用哪种。

+
+
后端类型:SuperRpa / AgentBrowser / RustNative / ComputerUse / Auto
+
文件位置:browser/pipe_backend.rs browser/ws_backend.rs
+
+
+
+

内部BrowserPipeTool

+

职责:STDIO Pipe 模式下的特权浏览器工具。为每个命令分配单调递增 seq,使用派生会话密钥计算 HMAC,发送 Command 消息后阻塞等待匹配的 Response,支持超时。

+
+
何时调用:Pipe 模式下每次浏览器操作
+
文件位置:pipe/browser_tool.rs
+
+
+
+
+
+ + +
+
+
7
+
外部系统关系图 - sgClaw与谁交互
+
+
+
+ sgClaw 不是孤立运行的,它与多个外部系统协同工作。以下是sgClaw与外部系统的交互关系。 +
+
+graph TB + subgraph External["外部系统 - sgClaw不控制这些系统"] + E1["LLM 提供商\nDeepSeek OpenAI Claude\nHTTP API 调用"] + E2["SuperRPA Chromium\n浏览器宿主进程\nSTDIO 或 WebSocket"] + E3["业务系统\n线损系统 客服系统\n通过浏览器访问"] + E4["客户端\nsg_claw_client CLI\nService WebSocket 连接"] + end + + subgraph sgClawInternal["sgClaw 内部"] + S1["通信网关\nSTDIO Pipe / Service WS"] + S2["Agent Runtime\ntask_runner 任务调度"] + S3["Runtime Engine\n构建Agent 工具策略"] + S4["ZeroClaw Core\nthird_party/zeroclaw\nAgent循环 工具循环"] + S5["MAC Policy\n安全策略 rules.json"] + S6["Browser Backend\nPipeBrowser / WsBrowser"] + end + + E4 -->|"SubmitTask"| S1 + S1 -->|"TaskComplete / LogEntry"| E4 + + S2 -->|"构建 Agent"| S3 + S3 -->|"build_agent"| S4 + + S4 -->|"发送Prompt 接收响应"| E1 + S4 -->|"调用工具"| S5 + S5 -->|"校验通过"| S6 + S6 -->|"浏览器命令"| E2 + E2 -->|"DOM操作"| E3 + E3 -->|"页面数据"| E2 + E2 -->|"命令结果"| S6 + S6 -->|"结果"| S4 + S4 -->|"事件桥接 log_entry"| S1 + + classDef extFill fill:#1f3d2d,stroke:#4caf50,color:#e6edf3 + classDef intFill fill:#4a2c17,stroke:#e65100,color:#e6edf3 + + class External,E1,E2,E3,E4 extFill + class sgClawInternal,S1,S2,S3,S4,S5,S6 intFill +
+
+
+ + +
+
+
8
+
完整生命周期 - 一个任务从出生到结束
+
+
+
+ 以一个真实场景为例:"帮我查本月线损率并导出Excel",展示sgClaw从接收指令到返回结果的完整生命周期。 +
+
+
+
1
+
+

通信网关接收指令

+

浏览器宿主进程通过 STDIO(JSON Line 协议)发送 SubmitTask 消息。sgClaw 创建会话,解析指令、page_url、page_title、conversation_id。

+
+
+
+
2
+
+

加载配置SgClawSettings

+

从 sgclaw_config.json 或环境变量加载配置:LLM provider(apiKey/baseUrl/model)、runtimeProfile、skillsDir、directSubmitSkill 等。

+
+
+
+
3
+
+

确定性场景匹配deterministic_submit

+

扫描 skills/ 目录下的场景清单(scene.toml),发现指令包含"线损率"、"本月"关键词,匹配到"线损查询"场景。构建 DeterministicExecutionPlan(含 target_url、org_code、period_mode 等参数)。

+
+
+
+
4
+
+

MAC Policy安全校验

+

检查目标域名是否在 rules.json 白名单中 → 通过。检查操作类型(navigate、click、getText)是否在动作白名单中 → 通过。

+
+
+
+
5
+
+

BrowserPipeTool执行浏览器命令

+

为每个命令分配单调递增 seq,使用派生会话密钥计算 HMAC。通过 StdioTransport 发送 Command 消息给浏览器宿主。执行:导航到线损系统 → 选择月份 → 点击查询 → 读取表格数据。

+
+
+
+
6
+
+

SuperRPA Chromium执行DOM操作

+

浏览器宿主接收 Command,验证 HMAC,执行实际 DOM 操作(导航、选择下拉框、点击按钮、读取表格内容),返回 Response(含操作结果 + HMAC)。

+
+
+
+
7
+
+

Report Artifact后处理

+

将浏览器返回的表格数据解析为结构化格式。根据场景的 postprocess 配置,使用 openxml_office 工具生成 .xlsx 文件。生成结果包含本地文件路径。

+
+
+
+
8
+
+

通信网关返回结果

+

通过 StdioTransport 发送 TaskComplete 消息给浏览器宿主,包含 success=true 和执行摘要(含生成的 .xlsx 文件路径)。浏览器宿主提示用户下载完成。

+
+
+
+
+
+ +
+ + + + \ No newline at end of file diff --git a/docs/sgClaw组件职责与流转全景图.pdf b/docs/sgClaw组件职责与流转全景图.pdf new file mode 100644 index 0000000..a87f907 Binary files /dev/null and b/docs/sgClaw组件职责与流转全景图.pdf differ diff --git a/docs/superpowers/plans/2026-04-14-request-url-resolution-plan.md b/docs/superpowers/plans/2026-04-14-request-url-resolution-plan.md new file mode 100644 index 0000000..c7445dd --- /dev/null +++ b/docs/superpowers/plans/2026-04-14-request-url-resolution-plan.md @@ -0,0 +1,418 @@ +# Request URL Resolution Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the temporary line-loss request URL hardcode in `src/service/server.rs` with a unified bootstrap-target resolver that prefers current page context, then deterministic submit plans, then skill metadata, and finally `about:blank`. + +**Architecture:** Add a small service-owned resolver that returns a narrow `SubmitBootstrapTarget` result and centralizes precedence rules. Reuse `DeterministicExecutionPlan.target_url` as the authoritative source for deterministic line-loss scenes, then add minimal skill metadata fallback for configured direct browser-script skills, while keeping callback-host behavior unchanged. + +**Tech Stack:** Rust, serde/serde_json, tungstenite, zeroclaw skill loader, staged `SKILL.toml` manifests, cargo test + +--- + +### Task 1: Add resolver-focused red tests for precedence + +**Files:** +- Modify: `src/service/server.rs:422-467` +- Test: `src/service/server.rs` (crate-local resolver tests) +- Test: `tests/service_ws_session_test.rs` + +- [ ] **Step 1: Write the failing page-context precedence test** + +In a crate-local unit test inside `src/service/server.rs`, add a focused resolver test that exercises the request-url resolver with: +- non-empty `page_url = "https://already-open.example.com/page"` +- an instruction that would otherwise match deterministic line-loss logic +- configured direct skill metadata present + +Assert the resolved bootstrap target uses the explicit non-empty `page_url` and reports `PageContext` source. + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `cargo test page_context_bootstrap_target_wins_over_deterministic_and_skill_fallback --lib -- --nocapture` +Expected: FAIL because no unified resolver/source enum exists yet. + +- [ ] **Step 3: Write the failing deterministic-precedence test** + +In `src/service/server.rs` crate-local tests, add a focused test for a deterministic line-loss instruction with no `page_url`. + +Use the same instruction shape already accepted by `decide_deterministic_submit(...)`, and assert: +- resolver source is `DeterministicPlan` +- resolved `request_url` equals `DeterministicExecutionPlan.target_url` +- no raw `instruction.contains("线损")` fallback is needed + +- [ ] **Step 4: Run the test to verify it fails** + +Run: `cargo test deterministic_bootstrap_target_uses_plan_target_url --lib -- --nocapture` +Expected: FAIL because service still uses `derive_request_url_from_instruction(...)`. + +- [ ] **Step 5: Write the failing skill-fallback test** + +In `src/service/server.rs` crate-local tests, add a focused test for: +- no `page_url` +- instruction not deterministic +- configured direct-submit skill metadata provides `bootstrap_url` + +Assert resolver source is `SkillConfig` and `request_url` matches metadata. + +- [ ] **Step 6: Run the test to verify it fails** + +Run: `cargo test skill_metadata_bootstrap_url_is_used_when_no_page_context_or_plan_exists --lib -- --nocapture` +Expected: FAIL because skill metadata is not read today. + +- [ ] **Step 7: Write the failing malformed-metadata fallback test** + +In `src/service/server.rs` crate-local tests, add a focused test for malformed `bootstrap_url` metadata, with no page context and no deterministic plan. + +Assert the resolver: +- ignores malformed metadata +- returns `Fallback` +- resolves to `about:blank` + +- [ ] **Step 8: Run the test to verify it fails** + +Run: `cargo test malformed_skill_bootstrap_url_falls_back_to_about_blank --lib -- --nocapture` +Expected: FAIL because malformed metadata is not handled by a resolver yet. + +--- + +### Task 2: Introduce the bootstrap-target resolver in service code + +**Files:** +- Modify: `src/service/server.rs:280-467` +- Modify: `src/service/mod.rs:17-22` +- Test: `src/service/server.rs` (crate-local resolver tests) + +- [ ] **Step 1: Add the narrow resolver types in service code** + +In `src/service/server.rs`, add: + +```rust +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct SubmitBootstrapTarget { + pub request_url: String, + pub expected_domain: Option, + pub source: BootstrapTargetSource, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum BootstrapTargetSource { + PageContext, + DeterministicPlan, + SkillConfig, + Fallback, +} +``` + +Keep them scoped to service code. Do not create a generic cross-runtime planning object. + +- [ ] **Step 2: Add a minimal resolver entry point** + +Implement a service-owned function in `src/service/server.rs`, conceptually: + +```rust +pub(crate) fn resolve_submit_bootstrap_target( + request: &crate::agent::SubmitTaskRequest, + workspace_root: &Path, + settings: &SgClawSettings, +) -> SubmitBootstrapTarget +``` + +Initial behavior for this step: +- return `PageContext` only when `request.page_url` exists and is non-empty after trimming +- add a crate-local regression that empty/whitespace `page_url` does not short-circuit later precedence tiers +- otherwise fall through to existing behavior temporarily so the new tests can compile incrementally + +- [ ] **Step 3: Update service startup to call the resolver** + +At the callback-host startup call site in `serve_client(...)`, replace: + +```rust +let bootstrap_url = initial_request_url_for_submit_task(&request); +``` + +with resolver usage: + +```rust +let bootstrap_target = resolve_submit_bootstrap_target(&request, context.workspace_root(), &settings); +let bootstrap_url = bootstrap_target.request_url; +``` + +Use the current settings-loading seam already used elsewhere in service code. Keep callback-host startup behavior otherwise unchanged. + +- [ ] **Step 4: Keep resolver visibility crate-local** + +Do not make the resolver types broadly public for integration tests. Keep the resolver and `BootstrapTargetSource` crate-local, and keep source-level assertions in `src/service/server.rs` unit tests. + +Only re-export/remove existing `initial_request_url_for_submit_task(...)` seams through `src/service/mod.rs` if production callers still require that wiring. + +- [ ] **Step 5: Run the first precedence test to verify it passes** + +Run: `cargo test page_context_bootstrap_target_wins_over_deterministic_and_skill_fallback --lib -- --nocapture` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/service/server.rs src/service/mod.rs +git commit -m "refactor(service): add submit bootstrap target resolver scaffold" +``` + +--- + +### Task 3: Make deterministic submit the authoritative source for line-loss bootstrap URLs + +**Files:** +- Modify: `src/service/server.rs:422-467` +- Modify: `src/compat/deterministic_submit.rs:13-101` +- Test: `src/service/server.rs` (crate-local resolver tests) +- Test: `tests/service_ws_session_test.rs` + +- [ ] **Step 1: Write a small service-side seam for deterministic resolution** + +In `src/service/server.rs`, update the resolver so that when `page_url` is absent it calls: + +```rust +crate::compat::deterministic_submit::decide_deterministic_submit( + &request.instruction, + request.page_url.as_deref(), + request.page_title.as_deref(), +) +``` + +Only `DeterministicSubmitDecision::Execute(plan)` should produce a deterministic bootstrap target. + +Treat `NotDeterministic` and `Prompt { .. }` as “no deterministic bootstrap target” for service startup. + +- [ ] **Step 2: Use `plan.target_url` directly** + +Map `DeterministicSubmitDecision::Execute(plan)` to: +- `request_url = plan.target_url.clone()` +- `expected_domain = Some(plan.expected_domain.clone())` +- `source = BootstrapTargetSource::DeterministicPlan` + +Do not reconstruct the URL in `server.rs`. + +- [ ] **Step 3: Remove the temporary line-loss hardcode** + +Delete this branch from `derive_request_url_from_instruction(...)` or remove the function entirely if it is no longer needed: + +```rust +if instruction.contains("线损") || instruction.contains("lineloss") { + return Some("http://20.76.57.61:18080".to_string()); +} +``` + +Keep any still-needed legacy Zhihu fallback only if the resolver still requires it after deterministic integration. + +- [ ] **Step 4: Add/adjust a deterministic regression test** + +In `src/service/server.rs` crate-local tests, add a focused assertion that line-loss bootstrap URL now comes from `DeterministicExecutionPlan.target_url`, not raw text matching. + +A good assertion shape is: +- call resolver with deterministic line-loss instruction +- assert `request_url == "http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor"` +- assert `source == DeterministicPlan` + +- [ ] **Step 5: Run deterministic tests to verify they pass** + +Run: `cargo test deterministic_bootstrap_target_uses_plan_target_url --lib -- --nocapture` +Expected: PASS. + +- [ ] **Step 6: Run service websocket coverage for the same precedence** + +Run: `cargo test callback_host --test service_ws_session_test -- --nocapture` +Expected: PASS with no line-loss hardcode dependency. + +- [ ] **Step 7: Commit** + +```bash +git add src/service/server.rs src/compat/deterministic_submit.rs tests/service_ws_session_test.rs +git commit -m "refactor(service): derive line-loss bootstrap URL from deterministic plan" +``` + +--- + +### Task 4: Add skill-metadata fallback for configured direct-submit skills + +**Files:** +- Modify: `src/compat/direct_skill_runtime.rs:114-153` +- Modify: `src/service/server.rs:422-467` +- Optionally modify: `src/config/settings.rs` only if a tiny metadata pointer is required +- Modify: `D:/data/ideaSpace/rust/sgClaw/claw/claw/skills/skill_staging/skills/fault-details-report/SKILL.toml` +- Optionally modify: `D:/data/ideaSpace/rust/sgClaw/claw/claw/skills/skill_staging/skills/95598-weekly-monitor-report/SKILL.toml` +- Test: `src/service/server.rs` (crate-local resolver tests) +- Test: `tests/service_ws_session_test.rs` + +- [ ] **Step 1: Define the minimal skill metadata shape** + +Extend staged `SKILL.toml` parsing expectations to support a narrow metadata seam for browser-script direct skills. + +The plan target fields are: +- `bootstrap_url` +- `expected_domain` + +Keep the metadata minimal. Do not add a broad dispatch registry or scene-policy schema. + +Recommended TOML shape in the skill manifest: + +```toml +[tools.metadata] +bootstrap_url = "https://example.com/path" +expected_domain = "example.com" +``` + +If the actual skill loader only supports per-tool custom fields in another location, use that established seam instead. Do not invent a parallel config file. + +- [ ] **Step 2: Add a helper that reads fallback metadata for the configured direct skill** + +In `src/compat/direct_skill_runtime.rs`, add a helper like: + +```rust +pub(crate) fn resolve_direct_submit_bootstrap_metadata( + configured_tool: &str, + workspace_root: &Path, + settings: &SgClawSettings, +) -> Result, PipeError> +``` + +Recommended shape: + +```rust +pub(crate) struct DirectSubmitBootstrapMetadata { + pub bootstrap_url: String, + pub expected_domain: Option, +} +``` + +Reuse the existing `resolve_browser_script_skill(...)` lookup path so the service resolver does not duplicate staged-skill discovery logic. + +- [ ] **Step 3: Validate metadata conservatively** + +When reading fallback metadata: +- accept only non-empty `bootstrap_url` +- require it to parse as a valid absolute URL +- normalize or preserve `expected_domain` only if non-empty +- on malformed metadata, return `Ok(None)` for resolver purposes instead of failing service startup + +This keeps malformed fallback data from breaking submits and matches the approved spec. + +- [ ] **Step 4: Wire skill metadata into the service resolver** + +Update `resolve_submit_bootstrap_target(...)` to: +- check skill metadata only after page context and deterministic parsing fail +- use `SkillConfig` as the source when metadata resolves +- fall through to `about:blank` when metadata is missing or malformed + +- [ ] **Step 5: Add a staged-skill fixture update** + +Update at least one configured direct skill fixture, likely `fault-details-report`, to include valid fallback metadata. + +Use concrete values appropriate for that skill’s target page; do not reuse the line-loss URL. + +- [ ] **Step 6: Run the skill-fallback test to verify it passes** + +Run: `cargo test skill_metadata_bootstrap_url_is_used_when_no_page_context_or_plan_exists --lib -- --nocapture` +Expected: PASS. + +- [ ] **Step 7: Run the malformed-metadata test to verify it passes** + +Run: `cargo test malformed_skill_bootstrap_url_falls_back_to_about_blank --lib -- --nocapture` +Expected: PASS. + +- [ ] **Step 8: Commit** + +```bash +git add src/compat/direct_skill_runtime.rs src/service/server.rs D:/data/ideaSpace/rust/sgClaw/claw/claw/skills/skill_staging/skills/fault-details-report/SKILL.toml tests/service_ws_session_test.rs +git commit -m "feat(service): add direct skill bootstrap URL fallback metadata" +``` + +--- + +### Task 5: Remove obsolete request-url glue and lock the final precedence contract + +**Files:** +- Modify: `src/service/server.rs:422-467` +- Modify: `src/service/mod.rs:20-22` +- Test: `src/service/server.rs` (crate-local resolver tests) +- Test: `tests/service_ws_session_test.rs` + +- [ ] **Step 1: Delete obsolete helper logic** + +If `derive_request_url_from_instruction(...)` is no longer needed after resolver landing, delete it completely. + +If a tiny legacy Zhihu-only seam still remains, keep it private behind the resolver and remove the old public shape from `service::browser_ws_client` if no longer needed. + +- [ ] **Step 2: Lock the precedence contract with one final matrix test** + +In `src/service/server.rs` crate-local tests, add one table-driven or clearly segmented test that verifies all four final outcomes: +- non-empty page context wins +- deterministic plan wins when page context is absent or empty +- skill metadata wins when page context and deterministic plan are absent +- fallback becomes `about:blank` when nothing resolves + +- [ ] **Step 3: Run the focused resolver suite** + +Run: `cargo test bootstrap_target --lib -- --nocapture` +Expected: PASS. + +- [ ] **Step 4: Run service websocket regression coverage** + +Run: `cargo test callback_host --test service_ws_session_test -- --nocapture` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/service/server.rs src/service/mod.rs tests/service_ws_session_test.rs +git commit -m "refactor(service): finalize bootstrap target precedence" +``` + +--- + +### Task 6: Full verification and implementation handoff check + +**Files:** None (verification only) + +- [ ] **Step 1: Run focused deterministic and direct-skill tests** + +Run: `cargo test deterministic_submit -- --nocapture` +Expected: PASS. + +Run: `cargo test direct_submit -- --nocapture` +Expected: PASS. + +- [ ] **Step 2: Run service submit regression coverage** + +Run: `cargo test --test service_task_flow_test -- --nocapture` +Expected: PASS. + +Run: `cargo test --test service_ws_session_test -- --nocapture` +Expected: PASS. + +- [ ] **Step 3: Run targeted config/settings coverage if touched** + +Run: `cargo test service_protocol_update_config_test -- --nocapture` +Expected: PASS. + +- [ ] **Step 4: Build the project** + +Run: `cargo build --bin sg_claw` +Expected: PASS. + +- [ ] **Step 5: Manual behavior checklist** + +Verify manually: +1. Existing page-attached submits still bootstrap against the current page URL. +2. Deterministic line-loss submit without page context boots helper against the line-loss target page from `DeterministicExecutionPlan.target_url`. +3. Non-deterministic configured direct skill without page context uses skill metadata bootstrap URL if present. +4. Missing or malformed skill metadata does not crash startup and falls back to `about:blank`. +5. No service code remains that hardcodes line-loss request URL by checking raw instruction text. + +- [ ] **Step 6: Final commit (only if verification revealed required follow-up fixes)** + +```bash +git add -A +git commit -m "test: lock request URL resolution precedence" +``` + +Only create this commit if verification required an additional code or test fix. diff --git a/docs/superpowers/plans/2026-04-15-generated-scene-skill-platform-plan.md b/docs/superpowers/plans/2026-04-15-generated-scene-skill-platform-plan.md new file mode 100644 index 0000000..575aa6c --- /dev/null +++ b/docs/superpowers/plans/2026-04-15-generated-scene-skill-platform-plan.md @@ -0,0 +1,1180 @@ +# Generated Scene Skill Platform Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a manifest-driven generated-scene platform that discovers staged report/collection `browser_script` scenes, routes deterministic `。。。` requests through generic registry/resolver logic, migrates `tq-lineloss-report` off one-off Rust branches, and ships a first in-repo generator that outputs registration-ready scene packages with minimal or zero per-scene Rust changes. + +**Architecture:** Keep the existing submit branch shape in `src/agent/task_runner.rs`, but replace the line-loss-specific deterministic branch with a thin adapter over a generic scene registry, deterministic dispatcher, generic report-artifact interpreter, and generic XLSX postprocess path. Keep the generator separate from runtime internals by making `scene.toml` plus the lessons-learned TOML the only stable generator/runtime contract; generator code lives in its own module and binary, while runtime code stays under the existing `compat` submit/bootstrap seams. + +**Tech Stack:** Rust 2021, `serde`, `serde_json`, `toml`, existing `browser_script` runtime and callback-host/browser-backend seams, `node:test` for staged JS, Cargo integration tests, filesystem-based package generation. + +--- + +## Execution Context + +- Branch from the repo's current ws baseline branch, which is `feature/claw-ws` in this checkout today. Do **not** implement on that branch directly; create a new feature branch from its HEAD. +- Do **not** create a worktree unless the user explicitly asks. Branch isolation is required; worktree isolation is not. +- Keep `skillsDir` as the existing single resolved path. The new scene registry must scan inside that one resolved skills root instead of adding array-style scene roots or a second config field. +- For this branch's automated tests and real smokes, use a repo-local `skillsDir` override that points at `examples/generated_scene_platform`. That still preserves the single-root contract because the runtime scans one resolved root whose `skills/` child contains the committed sample package. +- Put the new runtime registration manifest at `/scene.toml`. Keep existing `skill_staging/scenes/*/scene.json` files for legacy staging/UI metadata and do **not** move runtime dispatch policy back into `scene.json`. +- Keep every required deliverable for this plan inside the current `claw-new` repo so the branch can be built, tested, and committed independently. The first committed sample package should live under `examples/generated_scene_platform/skills/`; publishing the same package into any external skills/staging repo is a separate follow-up, not part of this branch. +- V1 scope is locked to `category = "report_collection"`, `kind = "browser_script"`, `artifact.type = "report-artifact"`. Unsupported scene types must fail fast instead of partially working. +- Deterministic invocation remains exact-suffix-only: only raw instructions ending with the exact `。。。` suffix enter the scene dispatcher. +- Never use hidden page defaults for required canonical parameters. Missing org, missing month/week mode, or missing period must prompt and stop. +- Do **not** add a generic login/session subsystem in this plan. +- Preserve current non-platform flows: Zhihu/LLM, configured `directSubmitSkill`, and ordinary browser-attached orchestration must remain behaviorally unchanged unless an explicit regression test says otherwise. + +## File Map + +### Core runtime and contract files + +- Create: `src/scene_contract/mod.rs` + - shared serializable manifest contract used by both runtime and generator +- Create: `src/scene_contract/manifest.rs` + - `scene.toml` schema types, schema-version validation helpers, artifact/postprocess enums +- Create: `src/compat/scene_platform/mod.rs` + - exports the registry, dispatch, and resolver units +- Create: `src/compat/scene_platform/registry.rs` + - scans the single resolved `skillsDir`, loads `/scene.toml`, validates duplicates and runtime compatibility +- Create: `src/compat/scene_platform/dispatch.rs` + - deterministic candidate scoring, ambiguity fail-closed behavior, canonical param resolution, executable scene plan creation +- Create: `src/compat/scene_platform/resolvers.rs` + - reusable resolver types for `dictionary_entity`, `month_week_period`, `fixed_enum`, and `literal_passthrough` +- Create: `src/compat/report_artifact.rs` + - generic report-artifact parsing, status mapping, summary building, and export-readiness helpers +- Create: `src/compat/report_xlsx_export.rs` + - generic XLSX exporter for any `report-artifact` with `column_defs`/`columns` + `rows` +- Modify: `src/lib.rs` + - export new shared/runtime/generator modules and any CLI helpers needed by tests +- Modify: `src/compat/mod.rs` + - export the new scene-platform and report-artifact modules +- Modify: `src/compat/deterministic_submit.rs` + - keep the public API shape, but make it registry/manifest-driven instead of line-loss-hardcoded +- Modify: `src/compat/direct_skill_runtime.rs` + - reuse the generic report-artifact interpreter so direct-submit and scene-submit summarize/status-map the same way +- Modify: `src/agent/task_runner.rs` + - keep branch order, but call the new registry-backed deterministic planner before ordinary orchestration/LLM +- Modify: `src/service/server.rs` + - keep bootstrap precedence shape, but let deterministic plans source `target_url` / `expected_domain` from scene manifests instead of hardcoded constants + +### Generator files + +- Create: `src/generated_scene/mod.rs` + - generator entrypoints shared by tests and CLI +- Create: `src/generated_scene/analyzer.rs` + - source directory inspection for v1 report/collection `browser_script` scenes +- Create: `src/generated_scene/generator.rs` + - template rendering and package writing into an output staging root +- Create: `src/generated_scene/lessons.rs` + - loads and validates `tq-lineloss-lessons-learned.toml` as generation constraints +- Create: `src/bin/sg_scene_generate.rs` + - CLI entry for `sgClaw`'s in-repo scene generator capability + +### In-repo sample package and reference assets + +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/scene.toml` + - first committed manifest-driven sample scene package used by runtime and generator tests in this repo +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/references/org-dictionary.json` + - external dictionary data for the `dictionary_entity` resolver fixture +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/SKILL.toml` + - committed sample browser-script tool contract aligned with the manifest-driven runtime +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/SKILL.md` + - committed sample documentation for canonical args, artifact contract, and runtime expectations +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.js` + - committed sample collection script with generic-platform artifact fields +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.test.js` + - committed JS contract tests for canonical args and artifact shape +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/references/data-quality.md` + - committed sample data-quality notes aligned with manifest-driven output rules +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/references/collection-flow.md` + - committed sample bootstrap/collection-flow notes +- Create: `tests/fixtures/scene_source/tq_lineloss/index.html` + - hermetic in-repo source fixture for required analyzer/generator smoke coverage +- Create: `tests/fixtures/scene_source/tq_lineloss/js/collect.js` + - hermetic in-repo source fixture JS for analyzer/generator smoke coverage + +### Repo-local runtime discovery path for validation + +- Use `examples/generated_scene_platform` as the repo-local `skillsDir` override root during tests and manual smokes. +- The runtime still scans one resolved root only; it just resolves that root to `examples/generated_scene_platform`, whose `skills/` child contains the committed sample package. +- Add or reuse a tiny repo-local config fixture such as `tmp/generated_scene_platform_sgclaw_config.json` or an equivalent test helper so the validation steps all point at the same reproducible `skillsDir`. +- Do not require external staging repos to make the manifest-driven runtime discoverable during this branch. + +### External publish target kept out of scope for this branch + +- Do not modify external paths like `D:/data/ideaSpace/rust/sgClaw/claw/claw/skills/...` in this plan. +- If the user later wants the generated sample published into that external staging repo, do it as a separate follow-up after this branch is green. + +### Platform-reference files + +### Tests and fixtures + +- Create: `tests/scene_registry_test.rs` + - manifest loading, duplicate detection, schema validation, tool compatibility checks +- Create: `tests/report_artifact_postprocess_test.rs` + - generic report-artifact parsing and XLSX postprocess coverage +- Create: `tests/generated_scene_lessons_test.rs` + - lessons-TOML shape and required-rule coverage +- Create: `tests/scene_generator_test.rs` + - analyzer + generator integration coverage using hermetic fixtures +- Create: `tests/fixtures/generated_scene/report_collection/index.html` + - supported v1 report-scene fixture +- Create: `tests/fixtures/generated_scene/report_collection/js/report.js` + - supported fixture source hints for analyzer tests +- Create: `tests/fixtures/generated_scene/non_report/index.html` + - unsupported fixture proving fail-fast behavior +- Modify: `tests/deterministic_submit_test.rs` + - migrate from hardcoded line-loss expectations to registry-driven deterministic behavior +- Modify: `tests/agent_runtime_test.rs` + - keep direct-submit behavior intact while sharing generic report-artifact summaries +- Modify: `tests/service_task_flow_test.rs` + - task-runner/bootstrap regressions for manifest-driven deterministic scenes +- Modify: `tests/service_ws_session_test.rs` + - callback-host bootstrap target regression for manifest-driven deterministic submit when the browser-ws path is active + +### Legacy files to delete only after green verification proves they are unused + +- Delete: `src/compat/tq_lineloss/org_units.rs` +- Delete: `src/compat/tq_lineloss/org_resolver.rs` +- Delete: `src/compat/tq_lineloss/period_resolver.rs` +- Delete or reduce to a compatibility shim only if still needed: `src/compat/lineloss_xlsx_export.rs` + +--- + +### Task 1: Create the implementation branch and lock the layout boundaries + +**Files:** +- Verify only + +- [ ] **Step 1: Switch to the ws baseline branch and create a new platform branch** + +Run: + +```bash +git switch feature/claw-ws +git switch -c feature/generated-scene-skill-platform +``` + +Expected: `git status -sb` shows a clean new branch rooted at the current ws baseline, not `feature/claw-ws` itself. + +- [ ] **Step 2: Verify the current single-root skills layout before coding** + +Run: + +```bash +cargo test --test compat_config_test ws_cleanup_resolves_single_configured_skills_dir -- --nocapture +``` + +Expected: PASS, proving the repo still uses one resolved `skillsDir` path and the platform work must build on that instead of introducing array-style roots. + +- [ ] **Step 3: Write down the two non-negotiable layout decisions in the first registry test scaffold** + +The very first red test file (`tests/scene_registry_test.rs`) must assume: + +```rust +// runtime manifest location: +let manifest_path = skill_root.join("scene.toml"); + +// legacy scene.json stays outside runtime dispatch ownership: +assert!(skill_root.join("scene.toml").exists()); +assert!(!manifest_path.ends_with("skill_staging/scenes/.../scene.json")); +``` + +This prevents the implementation from drifting back toward `scene.json` routing or multi-root config. + +--- + +### Task 2: Add the shared `scene.toml` contract and registry loader + +**Files:** +- Create: `src/scene_contract/mod.rs` +- Create: `src/scene_contract/manifest.rs` +- Create: `src/compat/scene_platform/mod.rs` +- Create: `src/compat/scene_platform/registry.rs` +- Modify: `src/lib.rs` +- Modify: `src/compat/mod.rs` +- Create: `tests/scene_registry_test.rs` + +- [ ] **Step 1: Write the failing registry tests first** + +Add `tests/scene_registry_test.rs` with focused red cases like: + +```rust +#[test] +fn registry_loads_scene_manifest_from_skill_root() { + let skill_root = temp_skill_with_scene_manifest(r#" +[scene] +id = "tq-lineloss-report" +skill = "tq-lineloss-report" +tool = "collect_lineloss" +kind = "browser_script" +version = "0.1.0" +category = "report_collection" + +[manifest] +schema_version = "1" + +[bootstrap] +expected_domain = "20.76.57.61" +target_url = "http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor" +requires_target_page = true + +[artifact] +type = "report-artifact" +success_status = ["ok", "partial", "empty"] +failure_status = ["blocked", "error"] +"#); + + let registry = load_scene_registry(skill_root.parent().unwrap()).unwrap(); + assert_eq!(registry.len(), 1); + assert_eq!(registry[0].manifest.scene.id, "tq-lineloss-report"); +} + +#[test] +fn registry_rejects_duplicate_scene_ids_with_both_paths_in_error() { /* two skills, same scene.id */ } + +#[test] +fn registry_rejects_unknown_manifest_schema_version() { /* schema_version = "999" */ } + +#[test] +fn registry_rejects_non_browser_script_scene_tool_in_v1() { /* kind = "shell" should fail */ } + +#[test] +fn registry_ignores_skills_without_scene_toml() { /* ordinary skills still load elsewhere */ } +``` + +- [ ] **Step 2: Run the registry test file and verify it fails** + +Run: + +```bash +cargo test --test scene_registry_test -- --nocapture +``` + +Expected: FAIL because `scene.toml` types and registry loading do not exist yet. + +- [ ] **Step 3: Implement the serializable manifest contract and the single-root registry loader** + +Implement the minimal contract and loader needed to satisfy the tests: + +```rust +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct SceneManifest { + pub scene: SceneSection, + pub manifest: ManifestSection, + pub bootstrap: BootstrapSection, + pub deterministic: DeterministicSection, + pub params: Vec, + pub artifact: ArtifactSection, + pub postprocess: Option, +} + +#[derive(Debug, Clone)] +pub struct SceneRegistryEntry { + pub manifest: SceneManifest, + pub skill_root: PathBuf, +} + +pub fn load_scene_registry(skills_dir: &Path) -> Result, SceneRegistryError> { + // iterate immediate skill dirs under the already-resolved single skillsDir + // look for /scene.toml only + // parse and validate schema version + // verify scene.id uniqueness across the loaded root + // verify manifest.scene.skill matches the containing skill package + // verify referenced tool exists in SKILL.toml and is browser_script in v1 +} +``` + +Rules to lock now: +- `schema_version = "1"` is the only accepted version in v1 +- duplicate `scene.id` is a hard error and must report both manifest paths +- manifest loading must not add a second config key or a hardcoded `skill_staging/scenes` scan +- `scene.toml` is runtime-owned; `scene.json` stays legacy-only + +- [ ] **Step 4: Re-run the registry tests and verify they pass** + +Run: + +```bash +cargo test --test scene_registry_test -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the contract and registry slice** + +Run: + +```bash +git add src/lib.rs src/scene_contract/mod.rs src/scene_contract/manifest.rs src/compat/mod.rs src/compat/scene_platform/mod.rs src/compat/scene_platform/registry.rs tests/scene_registry_test.rs +git commit -m "feat: add scene manifest registry" +``` + +Expected: one commit that introduces the stable runtime/generator contract and registry loader. + +--- + +### Task 3: Generalize deterministic dispatch and reusable parameter resolvers + +**Files:** +- Create: `src/compat/scene_platform/dispatch.rs` +- Create: `src/compat/scene_platform/resolvers.rs` +- Modify: `src/compat/deterministic_submit.rs` +- Modify: `tests/deterministic_submit_test.rs` + +- [ ] **Step 1: Replace the line-loss-only deterministic tests with registry-backed red tests** + +Extend `tests/deterministic_submit_test.rs` with registry-backed red cases built from temp fixture manifests under a temporary skills root. Do **not** depend on the committed sample package from Task 6 yet; Task 3 must stay hermetic and independently runnable. Add failing cases such as: + +```rust +#[test] +fn deterministic_submit_uses_registry_backed_scene_plan() { + let decision = decide_deterministic_submit( + "兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。", + None, + None, + ); + + match decision { + DeterministicSubmitDecision::Execute(plan) => { + assert_eq!(plan.scene_id, "tq-lineloss-report"); + assert_eq!(plan.tool_name, "tq-lineloss-report.collect_lineloss"); + assert_eq!(plan.expected_domain, "20.76.57.61"); + assert_eq!(plan.target_url, "http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor"); + } + other => panic!("expected execute plan, got {other:?}"), + } +} + +#[test] +fn deterministic_submit_fails_closed_on_scene_ambiguity() { /* two plausible scene.toml entries -> Prompt */ } + +#[test] +fn deterministic_submit_prompts_for_missing_period_instead_of_defaulting() { + let decision = decide_deterministic_submit("兰州公司 台区线损大数据 月累计线损率统计分析。。。", None, None); + assert!(matches!(decision, DeterministicSubmitDecision::Prompt { .. })); +} + +#[test] +fn deterministic_submit_uses_page_context_to_break_ties_before_keyword_only_match() { /* page_url/title beats keyword overlap */ } + +#[test] +fn zhihu_without_suffix_remains_not_deterministic() { + assert!(matches!( + decide_deterministic_submit("打开知乎热榜", Some("https://www.zhihu.com/hot"), Some("知乎热榜")), + DeterministicSubmitDecision::NotDeterministic + )); +} +``` + +Also invert the current default-period expectations. `兰州公司 月累计。。。` and `兰州公司 周累计。。。` must now prompt instead of executing. + +- [ ] **Step 2: Run the targeted deterministic tests and verify they fail** + +Run: + +```bash +cargo test --test deterministic_submit_test -- --nocapture +``` + +Expected: FAIL because the current implementation is still hardcoded to line-loss constants and still defaults missing month/week periods. + +- [ ] **Step 3: Implement reusable resolver types and a registry-backed dispatcher** + +Implement the generic deterministic planner in the new scene-platform modules, then make `src/compat/deterministic_submit.rs` a thin adapter over it. + +Required implementation shape: + +```rust +pub enum ResolverKind { + DictionaryEntity, + MonthWeekPeriod, + FixedEnum, + LiteralPassthrough, +} + +pub struct SceneExecutionPlan { + pub scene_id: String, + pub instruction: String, + pub tool_name: String, + pub expected_domain: String, + pub target_url: String, + pub args: Map, + pub success_statuses: Vec, + pub failure_statuses: Vec, + pub postprocess: Option, +} + +pub fn plan_deterministic_scene( + raw_instruction: &str, + page_url: Option<&str>, + page_title: Option<&str>, + skills_dir: &Path, +) -> Result { + // exact suffix gate + // load registry from the single skillsDir + // score candidate scenes using include/exclude keywords + page context + required-param resolution + // if multiple remain plausible -> fail closed with explicit ambiguity prompt + // resolve params using generic resolver kinds + // build executable SceneExecutionPlan with manifest bootstrap + tool + canonical args +} +``` + +Resolver rules to lock now: +- `dictionary_entity` reads external dictionary data such as `references/org-dictionary.json`; no hardcoded org list in Rust after migration +- `month_week_period` returns explicit prompts for missing mode, missing period, contradictory month/week intent, or week-without-year +- `fixed_enum` and `literal_passthrough` exist now so the manifest contract is extensible, even if line-loss is the only v1 user +- if a new scene needs a new resolver **type**, add a reusable resolver, not a scene-specific `if scene_id == ...` branch + +- [ ] **Step 4: Re-run the deterministic tests and verify they pass** + +Run: + +```bash +cargo test --test deterministic_submit_test -- --nocapture +``` + +Expected: PASS, including the new no-default-period behavior and ambiguity fail-closed coverage. + +- [ ] **Step 5: Commit the registry-driven deterministic slice** + +Run: + +```bash +git add src/compat/deterministic_submit.rs src/compat/scene_platform/dispatch.rs src/compat/scene_platform/resolvers.rs tests/deterministic_submit_test.rs +git commit -m "feat: add registry-driven deterministic scene dispatch" +``` + +Expected: one commit that removes one-off line-loss decision ownership from the deterministic planner. + +--- + +### Task 4: Add a generic report-artifact interpreter and XLSX postprocess path + +**Files:** +- Create: `src/compat/report_artifact.rs` +- Create: `src/compat/report_xlsx_export.rs` +- Modify: `src/compat/direct_skill_runtime.rs` +- Modify: `src/compat/deterministic_submit.rs` +- Create: `tests/report_artifact_postprocess_test.rs` +- Modify: `tests/agent_runtime_test.rs` + +- [ ] **Step 1: Write the red tests for generic report-artifact handling** + +Add `tests/report_artifact_postprocess_test.rs` and the minimum `tests/agent_runtime_test.rs` extensions needed to prove the platform no longer depends on line-loss-specific Rust export logic: + +```rust +#[test] +fn report_artifact_postprocess_exports_xlsx_for_ok_or_partial_scene() { + let artifact = serde_json::json!({ + "type": "report-artifact", + "report_name": "tq-lineloss-report", + "status": "partial", + "columns": ["ORG_NAME", "LINE_LOSS_RATE"], + "column_defs": [["ORG_NAME", "供电单位"], ["LINE_LOSS_RATE", "综合线损率(%)"]], + "rows": [{"ORG_NAME": "国网兰州供电公司", "LINE_LOSS_RATE": "1.23"}], + "counts": {"rows": 1}, + "partial_reasons": ["report_log_failed"] + }); + + let outcome = interpret_report_artifact_and_postprocess(&artifact, report_postprocess_xlsx(), temp_workspace()).unwrap(); + assert!(outcome.success); + assert!(outcome.summary.contains("status=partial")); + assert!(outcome.summary.contains("detail_rows=1")); + assert!(outcome.summary.contains("export_path=")); +} + +#[test] +fn report_artifact_postprocess_skips_export_for_blocked_or_error_scene() { /* no xlsx path */ } + +#[test] +fn direct_submit_and_scene_submit_share_the_same_report_summary_contract() { /* direct_skill_runtime + deterministic path both use same summary builder */ } +``` + +- [ ] **Step 2: Run the focused report-artifact tests and verify they fail** + +Run: + +```bash +cargo test --test report_artifact_postprocess_test -- --nocapture +cargo test --test agent_runtime_test submit_task_treats_partial_report_artifact_as_success_with_warning_summary -- --nocapture +``` + +Expected: FAIL because the generic interpreter/exporter does not exist yet and deterministic line-loss export is still special-cased. + +- [ ] **Step 3: Implement the shared parser, summary builder, and generic XLSX exporter** + +Implement a reusable path that both deterministic scenes and configured direct-submit skills can call: + +```rust +pub struct ParsedReportArtifact { + pub report_name: String, + pub status: String, + pub columns: Vec, + pub column_defs: Vec<(String, String)>, + pub rows: Vec>, + pub counts: ReportCounts, + pub partial_reasons: Vec, +} + +pub fn interpret_report_artifact_and_postprocess( + artifact_json: &Value, + postprocess: Option<&PostprocessSection>, + workspace_root: &Path, +) -> Result { + // parse report-artifact generically + // map ok/partial/empty => success=true + // map blocked/error => success=false + // if postprocess.exporter == Some("xlsx_report") and status is exportable, write xlsx under workspace_root/out + // if postprocess.auto_open == Some("excel"), reuse existing open-export helper +} +``` + +Rules: +- export logic must read `column_defs` when present, else fall back to `columns` +- do not keep line-loss-only column-name assumptions in Rust +- keep direct-submit behavior unchanged for non-artifact string outputs +- keep `blocked` / `error` as failures even if rows happen to be present late in the artifact + +- [ ] **Step 4: Re-run the focused tests and verify they pass** + +Run: + +```bash +cargo test --test report_artifact_postprocess_test -- --nocapture +cargo test --test agent_runtime_test submit_task_treats_partial_report_artifact_as_success_with_warning_summary -- --nocapture +cargo test --test agent_runtime_test submit_task_treats_blocked_report_artifact_as_failure -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the generic artifact/postprocess slice** + +Run: + +```bash +git add src/compat/report_artifact.rs src/compat/report_xlsx_export.rs src/compat/direct_skill_runtime.rs src/compat/deterministic_submit.rs tests/report_artifact_postprocess_test.rs tests/agent_runtime_test.rs +git commit -m "refactor: share generic report artifact postprocess" +``` + +Expected: one commit that removes the need for per-scene Rust export logic. + +--- + +### Task 5: Wire manifest-driven scenes into submit and bootstrap without regressing other flows + +**Files:** +- Modify: `src/agent/task_runner.rs` +- Modify: `src/service/server.rs` +- Modify: `tests/service_task_flow_test.rs` +- Modify: `tests/service_ws_session_test.rs` +- Modify: `tests/agent_runtime_test.rs` + +- [ ] **Step 1: Add the failing submit/bootstrap regression tests** + +Add focused tests that lock branch order and bootstrap behavior: + +```rust +#[test] +fn submit_task_routes_suffix_instruction_through_manifest_scene_before_llm() { + // no provider call should happen when deterministic scene planning succeeds or prompts +} + +#[test] +fn resolve_submit_bootstrap_target_prefers_manifest_scene_target_for_deterministic_scene() { + let request = SubmitTaskRequest { + instruction: "兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。".to_string(), + conversation_id: None, + messages: vec![], + page_url: None, + page_title: None, + }; + let target = resolve_submit_bootstrap_target(&request, workspace_root, &settings); + assert_eq!(target.request_url, "http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor"); + assert_eq!(target.expected_domain.as_deref(), Some("20.76.57.61")); +} + +#[test] +fn zhihu_without_suffix_keeps_existing_non_scene_path() { /* ordinary path unchanged */ } +``` + +For the browser-ws/callback-host path, add one regression in `tests/service_ws_session_test.rs` proving the first bootstrap/open target comes from `scene.toml` when a deterministic scene plan exists. + +- [ ] **Step 2: Run the focused integration tests and verify they fail** + +Run: + +```bash +cargo test --test service_task_flow_test -- --nocapture +cargo test --test service_ws_session_test callback_host -- --nocapture +``` + +Expected: FAIL because the submit/bootstrap path still depends on the old deterministic line-loss branch shape. + +- [ ] **Step 3: Implement the minimal wiring changes only where the branch already exists** + +Implementation targets: +- keep the current submit branch order in `src/agent/task_runner.rs` +- keep `resolve_submit_bootstrap_target(...)` precedence in `src/service/server.rs` +- replace the old hardcoded deterministic plan source with the new manifest-backed planner +- keep configured `directSubmitSkill` and ordinary LLM/browser orchestration behavior untouched + +The resulting branch order must still be: + +```rust +// 1. registry-backed deterministic scene (exact suffix only) +// 2. ordinary primary orchestration path +// 3. configured directSubmitSkill +// 4. compat LLM/runtime path +``` + +- [ ] **Step 4: Re-run the focused integration tests and verify they pass** + +Run: + +```bash +cargo test --test service_task_flow_test -- --nocapture +cargo test --test service_ws_session_test callback_host -- --nocapture +cargo test --test agent_runtime_test -- --nocapture +``` + +Expected: PASS, with no regression to the ordinary direct-submit or Zhihu paths. + +- [ ] **Step 5: Commit the submit/bootstrap integration slice** + +Run: + +```bash +git add src/agent/task_runner.rs src/service/server.rs tests/service_task_flow_test.rs tests/service_ws_session_test.rs tests/agent_runtime_test.rs +git commit -m "refactor: wire manifest scenes into submit bootstrap" +``` + +Expected: one commit that changes wiring only at the existing seams. + +--- + +### Task 6: Add the first manifest-driven `tq-lineloss-report` sample package inside this repo + +**Files:** +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/scene.toml` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/references/org-dictionary.json` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/SKILL.toml` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/SKILL.md` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.js` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.test.js` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/references/data-quality.md` +- Create: `examples/generated_scene_platform/skills/tq-lineloss-report/references/collection-flow.md` +- Modify: `tests/deterministic_submit_test.rs` +- Modify: `tests/scene_registry_test.rs` + +- [ ] **Step 1: Add the failing line-loss manifest and runtime-contract checks** + +Create the `scene.toml` shape in the in-repo sample package first and lock the migration expectations: + +```toml +[scene] +id = "tq-lineloss-report" +skill = "tq-lineloss-report" +tool = "collect_lineloss" +kind = "browser_script" +version = "0.1.0" +category = "report_collection" + +[manifest] +schema_version = "1" + +[bootstrap] +expected_domain = "20.76.57.61" +target_url = "http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor" +page_title_keywords = ["线损"] +requires_target_page = true + +[deterministic] +suffix = "。。。" +include_keywords = ["线损", "月累计", "周累计", "统计分析"] +exclude_keywords = ["知乎"] + +[[params]] +name = "org" +resolver = "dictionary_entity" +required = true +prompt_missing = "已命中台区线损报表技能,但缺少供电单位。" +prompt_ambiguous = "已命中台区线损报表技能,但供电单位存在歧义,请补充更完整名称。" + +[params.resolver_config] +dictionary_ref = "references/org-dictionary.json" +output_label_field = "org_label" +output_code_field = "org_code" + +[[params]] +name = "period" +resolver = "month_week_period" +required = true +prompt_missing = "已命中台区线损报表技能,但缺少统计周期。" +prompt_ambiguous = "已命中台区线损报表技能,但统计周期存在歧义,请补充更明确表达。" + +[artifact] +type = "report-artifact" +success_status = ["ok", "partial", "empty"] +failure_status = ["blocked", "error"] + +[postprocess] +exporter = "xlsx_report" +auto_open = "excel" +``` + +Also add a red JS assertion in the committed sample package proving the script returns `column_defs` and never re-parses raw natural-language org/period text: + +```javascript +test('buildBrowserEntrypointResult keeps canonical args and generic export fields only', async () => { + const artifact = await buildBrowserEntrypointResult({ + expected_domain: '20.76.57.61', + org_label: '国网兰州供电公司', + org_code: '62401', + period_mode: 'month', + period_mode_code: '1', + period_value: '2026-03', + period_payload: { fdate: '2026-03' }, + instruction: '兰州公司 月累计 2026-03' + }, fakeDeps); + + assert.equal(artifact.org.code, '62401'); + assert.ok(Array.isArray(artifact.column_defs)); + assert.equal(JSON.stringify(artifact).includes('兰州公司 月累计 2026-03'), false); +}); +``` + +- [ ] **Step 2: Run the targeted line-loss tests and verify they fail** + +Run: + +```bash +cargo test --test deterministic_submit_test -- --nocapture +node "examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.test.js" +``` + +Expected: FAIL because the runtime is not yet manifest-driven and the committed sample package does not yet expose the final manifest/dictionary/export contract. + +- [ ] **Step 3: Implement the sample-scene migration without adding per-scene Rust branches** + +Required actions: +- add `scene.toml` under the in-repo sample skill root and use the same layout the generator will emit +- make tests and service-smoke config resolve `skillsDir` to `examples/generated_scene_platform` so the registry can discover the committed sample package without any external repo copy step +- export the current org unit data into `references/org-dictionary.json` and make the resolver read that file instead of a Rust hardcoded list +- update `collect_lineloss.js` so the returned `report-artifact` includes generic-platform fields needed by `report_xlsx_export.rs` +- keep collection logic in JS; do **not** move line-loss business semantics back into Rust +- write `SKILL.toml` / `SKILL.md` / references docs into the sample package to describe canonical args and the manifest-driven contract +- keep any external staging-repo publish step out of scope for this branch; this task only commits the in-repo sample package + +- [ ] **Step 4: Re-run the line-loss tests and verify they pass** + +Run: + +```bash +cargo test --test deterministic_submit_test -- --nocapture +node "examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.test.js" +``` + +Expected: PASS, including the new missing-period prompt behavior and the new manifest-driven sample-package shape. + +- [ ] **Step 5: Commit the line-loss sample migration** + +Run: + +```bash +git add examples/generated_scene_platform/skills/tq-lineloss-report tests/deterministic_submit_test.rs tests/scene_registry_test.rs +git commit -m "feat: add manifest-driven lineloss sample package" +``` + +Expected: one commit that adds the first committed manifest-driven sample package and updates runtime expectations around it. + +--- + +### Task 7: Write the required `tq-lineloss` lessons-learned artifacts and load them as generator rules + +**Files:** +- Create: `docs/superpowers/references/tq-lineloss-lessons-learned.md` +- Create: `docs/superpowers/references/tq-lineloss-lessons-learned.toml` +- Create: `tests/generated_scene_lessons_test.rs` +- Create: `src/generated_scene/mod.rs` +- Create: `src/generated_scene/lessons.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write the failing lessons-rules test before the docs** + +Add `tests/generated_scene_lessons_test.rs` that requires all mandatory structured rule sections to exist. In the same red step, wire the empty `src/generated_scene/mod.rs` and `src/lib.rs` exports needed so this test fails on missing implementation/data, not on missing module visibility: + +```rust +#[test] +fn lineloss_lessons_toml_declares_required_generator_rules() { + let lessons = load_generation_lessons("docs/superpowers/references/tq-lineloss-lessons-learned.toml").unwrap(); + + assert!(lessons.routing.require_exact_suffix); + assert!(lessons.routing.unsupported_scene_fail_closed); + assert!(lessons.canonical_params.require_explicit_period); + assert!(lessons.bootstrap.require_expected_domain); + assert!(lessons.bootstrap.require_target_url); + assert!(lessons.artifact.require_report_artifact); + assert!(lessons.validation.require_pipe_and_ws_checks); + assert!(lessons.validation.require_manual_service_console_smoke); +} +``` + +- [ ] **Step 2: Run the lessons test and verify it fails** + +Run: + +```bash +cargo test --test generated_scene_lessons_test -- --nocapture +``` + +Expected: FAIL because the lessons loader and TOML file do not exist yet. + +- [ ] **Step 3: Implement the loader and write both lessons artifacts** + +Implement the loader and complete the minimal module wiring (`src/generated_scene/mod.rs`, `src/lib.rs`) in this task so `cargo test --test generated_scene_lessons_test` is buildable before Task 8. Use a TOML shape explicit enough for generator enforcement, for example: + +```toml +[routing] +require_exact_suffix = true +unsupported_scene_fail_closed = true +ambiguity_fail_closed = true + +[canonical_params] +require_dictionary_entity_for_org = true +require_explicit_period = true +forbid_hidden_page_defaults = true + +[bootstrap] +require_expected_domain = true +require_target_url = true +prefer_page_context_when_present = true + +[artifact] +require_report_artifact = true +require_column_defs_for_export = true +rust_side_xlsx_export_when_postprocess_xlsx = true + +[validation] +require_pipe_and_ws_checks = true +require_manual_service_console_smoke = true +require_callback_host_timeout_notes = true +``` + +The Markdown companion must explain the why behind those rules: deterministic routing pitfalls, canonical parameter pitfalls, bootstrap target pitfalls, pipe/ws differences, callback-host timeout lessons, and Rust-side export constraints. + +- [ ] **Step 4: Re-run the lessons tests and verify they pass** + +Run: + +```bash +cargo test --test generated_scene_lessons_test -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the lessons artifacts and loader** + +Run: + +```bash +git add docs/superpowers/references/tq-lineloss-lessons-learned.md docs/superpowers/references/tq-lineloss-lessons-learned.toml src/generated_scene/mod.rs src/generated_scene/lessons.rs src/lib.rs tests/generated_scene_lessons_test.rs +git commit -m "docs: add lineloss generation lessons" +``` + +Expected: one commit that makes the line-loss lessons machine-consumable and reviewable. + +--- + +### Task 8: Build the v1 source analyzer, package generator, and CLI entry + +**Files:** +- Create: `src/generated_scene/analyzer.rs` +- Create: `src/generated_scene/generator.rs` +- Create: `src/bin/sg_scene_generate.rs` +- Modify: `src/generated_scene/mod.rs` +- Modify: `src/lib.rs` +- Create: `tests/scene_generator_test.rs` +- Create: `tests/fixtures/generated_scene/report_collection/index.html` +- Create: `tests/fixtures/generated_scene/report_collection/js/report.js` +- Create: `tests/fixtures/generated_scene/non_report/index.html` +- Create: `tests/fixtures/scene_source/tq_lineloss/index.html` +- Create: `tests/fixtures/scene_source/tq_lineloss/js/collect.js` + +- [ ] **Step 1: Add the failing analyzer/generator tests with hermetic fixtures** + +Create fixture-backed tests like: + +```rust +#[test] +fn analyzer_classifies_supported_report_collection_source() { + let analysis = analyze_scene_source(Path::new("tests/fixtures/generated_scene/report_collection")).unwrap(); + assert_eq!(analysis.scene_kind, SceneKind::ReportCollection); + assert_eq!(analysis.tool_kind, ToolKind::BrowserScript); + assert!(analysis.bootstrap.target_url.is_some()); + assert!(analysis.collection_entry_script.is_some()); +} + +#[test] +fn generator_writes_registration_ready_package_with_scene_toml() { + let output_root = tempdir(); + generate_scene_package(GenerateSceneRequest { + source_dir: PathBuf::from("tests/fixtures/generated_scene/report_collection"), + scene_id: "sample-report-scene".to_string(), + scene_name: "示例报表场景".to_string(), + output_root: output_root.path().to_path_buf(), + lessons_path: PathBuf::from("docs/superpowers/references/tq-lineloss-lessons-learned.toml"), + }).unwrap(); + + assert!(output_root.path().join("skills/sample-report-scene/SKILL.toml").exists()); + assert!(output_root.path().join("skills/sample-report-scene/scene.toml").exists()); + assert!(output_root.path().join("skills/sample-report-scene/scripts/collect_sample_report_scene.js").exists()); + assert!(output_root.path().join("skills/sample-report-scene/scripts/collect_sample_report_scene.test.js").exists()); +} + +#[test] +fn generator_rejects_non_report_source_with_explicit_reason() { + let err = analyze_scene_source(Path::new("tests/fixtures/generated_scene/non_report")).unwrap_err(); + assert!(err.to_string().contains("report/collection browser_script only")); +} +``` + +- [ ] **Step 2: Run the generator tests and verify they fail** + +Run: + +```bash +cargo test --test scene_generator_test -- --nocapture +``` + +Expected: FAIL because the analyzer, generator, fixtures, and CLI do not exist yet. + +- [ ] **Step 3: Implement the analyzer, generator, CLI, and the source fixtures used by final smoke** + +Implementation rules: +- create the generator test fixtures under `tests/fixtures/generated_scene/*` +- create the hermetic source-smoke fixtures under `tests/fixtures/scene_source/tq_lineloss/*` so Task 9 can run without any external scenario directory +- analyzer must refuse unsupported/non-report scenes explicitly instead of generating broken packages +- generator must emit `scene.toml` inside the generated skill root +- generator must use `tq-lineloss-lessons-learned.toml` as a required input so the same hardening rules apply to future scenes +- generator/runtime coupling must stay at the file-contract level only +- CLI should use an explicit parser, no new heavy dependency + +Suggested CLI shape: + +```rust +cargo run --bin sg_scene_generate -- \ + --source-dir \ + --scene-id \ + --scene-name \ + --output-root \ + --lessons docs/superpowers/references/tq-lineloss-lessons-learned.toml +``` + +Expected outputs under ``: +- `skills//SKILL.toml` +- `skills//SKILL.md` +- `skills//scene.toml` +- `skills//references/*.md` +- `skills//scripts/*.js` +- `skills//scripts/*.test.js` + +- [ ] **Step 4: Re-run the generator tests and verify they pass** + +Run: + +```bash +cargo test --test scene_generator_test -- --nocapture +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the generator slice** + +Run: + +```bash +git add src/lib.rs src/generated_scene/mod.rs src/generated_scene/analyzer.rs src/generated_scene/generator.rs src/bin/sg_scene_generate.rs tests/scene_generator_test.rs tests/fixtures/generated_scene tests/fixtures/scene_source/tq_lineloss + git commit -m "feat: add generated scene package generator" +``` + +Expected: one commit that adds the in-repo v1 generator capability. + +--- + +### Task 9: Run the final verification sweep, smoke the real runtime, and remove unused one-off scene code + +**Files:** +- Delete if unused after green verification: `src/compat/tq_lineloss/org_units.rs` +- Delete if unused after green verification: `src/compat/tq_lineloss/org_resolver.rs` +- Delete if unused after green verification: `src/compat/tq_lineloss/period_resolver.rs` +- Delete or reduce to shim only if unused after green verification: `src/compat/lineloss_xlsx_export.rs` +- Modify: `src/compat/mod.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Remove only the legacy one-off files that are provably unused** + +Before deleting anything, prove the new path covers the old responsibilities: + +```bash +cargo test --test deterministic_submit_test -- --nocapture +cargo test --test scene_registry_test -- --nocapture +cargo test --test report_artifact_postprocess_test -- --nocapture +``` + +Then delete the old line-loss-only resolver/export files only if `cargo test` and `Grep` show they are no longer referenced. + +- [ ] **Step 2: Run the full automated verification sweep** + +Run: + +```bash +node "examples/generated_scene_platform/skills/tq-lineloss-report/scripts/collect_lineloss.test.js" +cargo test --test scene_registry_test -- --nocapture +cargo test --test deterministic_submit_test -- --nocapture +cargo test --test report_artifact_postprocess_test -- --nocapture +cargo test --test generated_scene_lessons_test -- --nocapture +cargo test --test scene_generator_test -- --nocapture +cargo test --test agent_runtime_test -- --nocapture +cargo test --test service_task_flow_test -- --nocapture +cargo test --test service_ws_session_test callback_host -- --nocapture +cargo test --test compat_runtime_test -- --nocapture +cargo test --test compat_config_test -- --nocapture +cargo build --bin sgclaw --bin sg_claw --bin sg_scene_generate +``` + +Expected: PASS. + +- [ ] **Step 3: Run the required hermetic generator smoke and keep the real external source smoke optional** + +Run the required in-repo smoke first: + +```bash +tmp_out="$(mktemp -d)" +cargo run --bin sg_scene_generate -- \ + --source-dir tests/fixtures/scene_source/tq_lineloss \ + --scene-id tq-lineloss-report \ + --scene-name "台区线损月周累计线损率统计分析" \ + --output-root "$tmp_out" \ + --lessons docs/superpowers/references/tq-lineloss-lessons-learned.toml +``` + +Expected: generator emits a complete package into `$tmp_out` using only in-repo fixtures. + +Optional manual follow-up after the required smoke is green: +- if the external scenario directory is available on the implementer's machine, re-run the same command against the real source tree for additional confidence +- if it is unavailable, do **not** block the branch on that machine-specific path + +- [ ] **Step 4: Run the real service-console smoke checks with `sg_claw.exe` semantics in mind** + +Manual verification checklist: +- write or reuse a repo-local `sgclaw_config.json` whose `skillsDir` points to `examples/generated_scene_platform` +- rebuild and run `sg_claw`/`sg_claw.exe` with that config so the runtime-scanned skills root is reproducible +- on the real line-loss page, submit `兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。` +- confirm the request bootstraps the manifest `target_url`, uses the manifest `expected_domain`, and returns the line-loss report artifact through the generic scene runtime +- submit `兰州公司 台区线损大数据 月累计线损率统计分析。。。` and confirm the runtime prompts for missing period instead of defaulting +- submit `打开知乎热榜` and confirm the ordinary Zhihu path still behaves as before +- submit `打开知乎热榜。。。` and confirm the deterministic runtime fails closed with the unsupported-scene prompt instead of falling into the Zhihu path + +- [ ] **Step 5: Commit the cleanup + verified platform state** + +Run: + +```bash +git add src/compat/mod.rs src/lib.rs src/compat src/generated_scene src/scene_contract docs/superpowers/references tests examples/generated_scene_platform + git commit -m "feat: add generated scene skill platform" +``` + +Expected: one final commit after the full automated and manual verification passes. + +--- + +## Verification Checklist + +### Registry and manifest contract + +```bash +cargo test --test scene_registry_test -- --nocapture +``` + +Expected: +- `scene.toml` loads from the skill root +- only `schema_version = "1"` passes +- duplicate `scene.id` fails with both manifest paths in the error +- non-`browser_script` or non-`report_collection` v1 scenes are rejected cleanly +- the registry still scans exactly one resolved `skillsDir` + +### Deterministic routing contract + +```bash +cargo test --test deterministic_submit_test -- --nocapture +``` + +Expected: +- exact `。。。` suffix only +- no-suffix behavior unchanged +- unsupported suffix-scene requests fail closed +- multi-match ambiguity fails closed +- missing org/mode/period prompt instead of defaulting +- page context may improve scoring but cannot cause silent guessing on unresolved ambiguity + +### Generic report-artifact handling + +```bash +cargo test --test report_artifact_postprocess_test -- --nocapture +cargo test --test agent_runtime_test -- --nocapture +``` + +Expected: +- `ok` / `partial` / `empty` map to success +- `blocked` / `error` map to failure +- generic XLSX export works from artifact fields, not line-loss-only Rust code +- configured `directSubmitSkill` keeps working on the shared artifact interpreter + +### Service submit/bootstrap path + +```bash +cargo test --test service_task_flow_test -- --nocapture +cargo test --test service_ws_session_test callback_host -- --nocapture +``` + +Expected: +- deterministic manifest scenes route before LLM +- bootstrap target resolution uses manifest `target_url` / `expected_domain` +- callback-host/browser-ws paths still receive the correct request URL +- non-deterministic Zhihu and direct-submit flows remain intact + +### Generator and lessons + +```bash +cargo test --test generated_scene_lessons_test -- --nocapture +cargo test --test scene_generator_test -- --nocapture +cargo build --bin sg_scene_generate +``` + +Expected: +- lessons TOML contains all required routing/param/bootstrap/artifact/validation rules +- analyzer only accepts v1 report/collection browser-script fixtures +- generator writes a complete package with `scene.toml` and JS test scaffold +- generator/runtime share only the explicit file contract, not hidden Rust internals + +### Real runtime smoke + +Manual checklist: +- `sg_claw.exe` / service console can still run the line-loss deterministic path +- missing-period deterministic line-loss requests prompt instead of defaulting +- plain Zhihu requests still avoid the scene platform +- suffixed unsupported requests fail closed +- line-loss export still opens through the generic postprocess path when configured + +--- + +## Notes For The Engineer + +- The paired approved spec is `docs/superpowers/specs/2026-04-15-generated-scene-skill-platform-design.md`. +- The current repo branch name for the ws baseline is `feature/claw-ws`, even though the design prose says `ws`. +- Do **not** reintroduce the old scene-registry experiment that was explicitly cleaned off the ws branch. This plan deliberately keeps the new runtime under `compat` and a shared serializable contract instead of reviving the deleted scene-only branch structure blindly. +- Keep `scene.toml` inside each skill package root. The separate `skill_staging/scenes/*/scene.json` tree remains legacy metadata only in this plan. +- Keep the generator extractable by holding the boundary at `scene.toml`, generated package layout, and lessons TOML rules. Avoid runtime code that reaches into generator-only internals. +- If a real scenario directory does not fit the v1 report/collection/browser-script envelope, the analyzer/generator must refuse it explicitly instead of emitting a half-valid package. +- Do **not** add a generic login/session platform here. Capture that need in docs if discovered, but keep it out of this implementation slice. diff --git a/docs/superpowers/plans/2026-04-17-generated-scene-rectification-plan.md b/docs/superpowers/plans/2026-04-17-generated-scene-rectification-plan.md new file mode 100644 index 0000000..35fcb3b --- /dev/null +++ b/docs/superpowers/plans/2026-04-17-generated-scene-rectification-plan.md @@ -0,0 +1,441 @@ +# Generated Scene Rectification Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Rectify the generated-scene pipeline so it stops emitting false-positive runnable skills for complex internal scenes, specifically by fixing `sceneId` degeneration, bootstrap pollution, incomplete workflow reconstruction, and readiness fail-open behavior. + +**Architecture:** Keep the current `Scene IR` pipeline, but add four hard control chains around it: naming validation, bootstrap evidence stratification, workflow evidence reconstruction, and readiness gating. Generation must fail closed whenever these chains are incomplete. + +**Tech Stack:** Rust, Node.js, HTML/CSS/JavaScript, serde_json, OpenAI-compatible LLM API + +--- + +## Scope Check + +This plan implements the design in: + +- `docs/superpowers/specs/2026-04-17-generated-scene-rectification-design.md` + +This plan builds on the existing generated-scene foundation already described in: + +- `docs/superpowers/specs/2026-04-17-scene-skill-compiler-design.md` +- `docs/superpowers/specs/2026-04-17-llm-driven-skill-generation-design.md` +- `docs/superpowers/specs/2026-04-17-enhanced-llm-extraction-schema-design.md` + +This plan does not attempt to solve: + +- login or authentication recovery +- Chromium host integration or browser embedding changes +- full runtime resolver expansion beyond what this rectification needs +- arbitrary historical scene compatibility outside the reference regression cases + +--- + +## File Map + +### Frontend scene generator + +| File | Action | Purpose | +|------|--------|---------| +| `frontend/scene-generator/generator-runner.js` | Modify | Implement naming fallback control, URL evidence stratification, workflow evidence cleanup, and pre-generation gate inputs | +| `frontend/scene-generator/llm-client.js` | Modify | Tighten sceneId semantic constraints and reject low-entropy LLM naming output | +| `frontend/scene-generator/server.js` | Modify | Aggregate readiness gates, block unsafe generation, and return rectification diagnostics | +| `frontend/scene-generator/sg_scene_generator.html` | Modify | Show invalid `sceneId`, bootstrap role breakdown, workflow evidence completeness, and generation block reasons | + +### Rust generated-scene pipeline + +| File | Action | Purpose | +|------|--------|---------| +| `src/generated_scene/analyzer.rs` | Modify | Add endpoint denoising, evidence role typing, and stricter archetype preconditions | +| `src/generated_scene/ir.rs` | Modify | Extend IR to carry candidate roles, gate states, and workflow evidence completeness | +| `src/generated_scene/generator.rs` | Modify | Prevent compiler routing when gates fail and surface fail-closed diagnostics | + +### Tests and fixtures + +| File | Action | Purpose | +|------|--------|---------| +| `tests/scene_generator_test.rs` | Modify | Cover naming, bootstrap, workflow, and readiness regression cases | +| `tests/scene_generator_html_test.rs` | Modify | Cover HTML/UI risk and blocking output | +| `tests/fixtures/generated_scene/paginated_enrichment/*` | Modify | Preserve marketing-like reference coverage | +| `tests/fixtures/generated_scene/multi_mode/*` | Modify | Preserve tq-like multi-mode coverage | +| Additional fixture files as needed | Create | Add low-entropy naming and localhost-pollution regression inputs | + +--- + +## Scope Guardrails + +- Do not broaden this work into a generic scene-generator redesign. +- Do not remove the existing `Scene IR` structure; extend and constrain it. +- Do not let `localhost` or helper/export endpoints participate in bootstrap selection. +- Do not silently coerce invalid `sceneId` values into accepted ids. +- Do not route into `paginated_enrichment` unless its minimum workflow evidence is complete. +- Do not emit a default runnable skill when any rectification gate fails. + +--- + +### Task 1: Rectify Naming Chain + +**Files:** +- Modify: `frontend/scene-generator/generator-runner.js` +- Modify: `frontend/scene-generator/llm-client.js` +- Modify: `frontend/scene-generator/server.js` +- Modify: `src/generated_scene/ir.rs` + +**Goal:** Stop Chinese-source scenes from degrading into low-information ids such as `2-0`, and turn `sceneId` into a validated business identifier instead of a raw slug fallback. + +- [ ] **Step 1: Classify sceneId candidate sources** + +Define explicit candidate tiers for `sceneId`: + +1. LLM semantic business id +2. deterministic keyword-derived id +3. controlled alias/transliteration fallback +4. invalid fallback candidate + +Expected result: the pipeline can explain where the chosen id came from. + +- [ ] **Step 2: Add low-entropy sceneId validation** + +Implement shared validation rules that reject ids which are: + +- numeric-only or numeric-dominant +- too short to be business-readable +- generic placeholders such as `scene` or `report` +- semantically detached from the extracted `sceneName` + +Expected result: ids like `2-0`, `1-0`, `scene`, `report` are blocked. + +- [ ] **Step 3: Fail closed on invalid sceneId** + +Update generation flow so invalid `sceneId` produces: + +- `invalid_scene_id` gate failure +- readiness downgrade +- analysis/report output only unless explicitly overridden later by a separate approved flow + +Expected result: invalid ids never create a formal generated skill directory by default. + +- [ ] **Step 4: Surface naming diagnostics in server/UI** + +Return and display: + +- chosen `sceneId` +- candidate source +- validation result +- invalidation reason if blocked + +- [ ] **Step 5: Add regression tests** + +Cover at least: + +- Chinese source name that previously degraded to `2-0` +- valid semantic id chosen over slug fallback +- invalid low-entropy id blocked from generation + +- [ ] **Step 6: Commit** + +```bash +git add frontend/scene-generator/generator-runner.js frontend/scene-generator/llm-client.js frontend/scene-generator/server.js src/generated_scene/ir.rs tests/scene_generator_test.rs +git commit -m "fix(generator): block degenerate generated scene ids" +``` + +--- + +### Task 2: Rectify Bootstrap Chain + +**Files:** +- Modify: `frontend/scene-generator/generator-runner.js` +- Modify: `frontend/scene-generator/server.js` +- Modify: `src/generated_scene/analyzer.rs` +- Modify: `src/generated_scene/ir.rs` + +**Goal:** Separate business bootstrap candidates from localhost/export/helper URLs so internal-network entry domains resolve correctly. + +- [ ] **Step 1: Add URL evidence role stratification** + +Classify URL candidates into: + +- `business_entry` +- `business_api` +- `gateway_api` +- `export_service` +- `local_helper` +- `static_asset` +- `template_noise` + +Expected result: every URL candidate is typed before bootstrap selection. + +- [ ] **Step 2: Add deterministic localhost and noise rejection** + +Ensure that: + +- `localhost` +- `127.0.0.1` +- `SurfaceServices` +- `ReportServices` +- `.js` / `.css` assets +- template placeholders and format strings + +are routed away from bootstrap candidates. + +Expected result: helper/export/static/template strings can remain as evidence but can never win bootstrap. + +- [ ] **Step 3: Redefine bootstrap resolution order** + +Bootstrap selection may only consume: + +1. `business_entry` +2. `business_api` +3. `gateway_api` + +When only helper/noise roles exist, set bootstrap to unresolved and downgrade readiness. + +- [ ] **Step 4: Preserve export/helper evidence separately** + +Retain localhost/export endpoints as downstream evidence for workflow/reporting, but isolate them from `expectedDomain` and `targetUrl`. + +- [ ] **Step 5: Add regression tests** + +Cover at least: + +- marketing-like source choosing `yx.gs.sgcc.com.cn` over `localhost` +- mixed business + gateway scene preserving business target page +- scene with only localhost/noise ending in unresolved bootstrap + +- [ ] **Step 6: Commit** + +```bash +git add frontend/scene-generator/generator-runner.js frontend/scene-generator/server.js src/generated_scene/analyzer.rs src/generated_scene/ir.rs tests/scene_generator_test.rs +git commit -m "fix(generator): stratify bootstrap evidence and exclude localhost" +``` + +--- + +### Task 3: Rectify Workflow Chain + +**Files:** +- Modify: `frontend/scene-generator/generator-runner.js` +- Modify: `frontend/scene-generator/server.js` +- Modify: `src/generated_scene/analyzer.rs` +- Modify: `src/generated_scene/ir.rs` +- Modify: `src/generated_scene/generator.rs` + +**Goal:** Reconstruct workflow from request-chain evidence instead of generic field names, so `paginated_enrichment` is only emitted when its true workflow exists. + +- [ ] **Step 1: Split workflow evidence into typed layers** + +Represent workflow evidence as: + +- request evidence +- pagination evidence +- secondary request evidence +- post-process evidence + +Expected result: archetype decisions operate on structured workflow signals instead of a flat endpoint list. + +- [ ] **Step 2: Denoise endpoint and method evidence** + +Normalize and filter out: + +- `${apiUrl}` +- template placeholders +- exception strings +- log text fragments +- localhost export endpoints + +Expected result: workflow reconstruction only consumes business-relevant requests. + +- [ ] **Step 3: Tighten archetype routing rules** + +Require `paginated_enrichment` to have at minimum: + +1. one main list request +2. one pagination variable set +3. one secondary request or explicit per-item enrichment function +4. one post-process action among `filter`, `transform`, `export` + +If only part of this exists, preserve it as candidate evidence but do not route into the compiler. + +- [ ] **Step 4: Narrow multi_mode detection** + +Allow `multi_mode_request` only when mode switching materially changes at least one of: + +- request body +- endpoint shape +- response path +- column definition + +Expected result: generic `type/tab/mode/status` fields alone no longer misclassify marketing-like scenes. + +- [ ] **Step 5: Block compiler routing on incomplete workflow** + +Update generator-side routing so incomplete evidence cannot produce a formal `paginated_enrichment` skill package. + +- [ ] **Step 6: Add regression tests** + +Cover at least: + +- marketing-like scene must expose `paginate` + `secondary_request` + post-process evidence +- generic mode fields without real mode divergence must not force `multi_mode_request` +- noisy endpoint lists must still reconstruct the correct business request chain + +- [ ] **Step 7: Commit** + +```bash +git add frontend/scene-generator/generator-runner.js frontend/scene-generator/server.js src/generated_scene/analyzer.rs src/generated_scene/ir.rs src/generated_scene/generator.rs tests/scene_generator_test.rs +git commit -m "fix(generator): require complete workflow evidence before archetype routing" +``` + +--- + +### Task 4: Rectify Readiness Chain + +**Files:** +- Modify: `frontend/scene-generator/server.js` +- Modify: `frontend/scene-generator/sg_scene_generator.html` +- Modify: `src/generated_scene/ir.rs` +- Modify: `src/generated_scene/generator.rs` +- Modify: `tests/scene_generator_html_test.rs` + +**Goal:** Turn readiness into a hard gate that distinguishes analysis output from runnable skill output. + +- [ ] **Step 1: Add explicit rectification gates** + +Track at minimum: + +- `scene_id_valid` +- `bootstrap_resolved` +- `workflow_complete_for_archetype` +- `runtime_contract_compatible` + +Expected result: readiness is derived from named gates rather than a loose score only. + +- [ ] **Step 2: Enforce fail-closed readiness rules** + +Require: + +- all core gates pass for readiness `A` or `B` +- any core gate failure forces readiness `C` +- generation endpoint blocks runnable output on gate failure + +- [ ] **Step 3: Separate analysis result from generation result** + +When gates fail, allow: + +- analysis preview +- evidence report +- block reasons + +But do not default to: + +- full skill emission +- compiler success messaging + +- [ ] **Step 4: Expose readiness breakdown in UI** + +Display: + +- gate names +- pass/fail state +- missing workflow pieces +- bootstrap resolution reason +- invalid sceneId reason + +- [ ] **Step 5: Add regression tests** + +Cover at least: + +- invalid `sceneId` forcing readiness `C` +- unresolved bootstrap forcing readiness `C` +- incomplete paginated workflow forcing readiness `C` +- fully valid reference fixture remaining eligible for generation + +- [ ] **Step 6: Commit** + +```bash +git add frontend/scene-generator/server.js frontend/scene-generator/sg_scene_generator.html src/generated_scene/ir.rs src/generated_scene/generator.rs tests/scene_generator_html_test.rs tests/scene_generator_test.rs +git commit -m "fix(generator): enforce readiness fail-closed gating" +``` + +--- + +### Task 5: Reference Regression Verification + +**Files:** +- Modify: `tests/scene_generator_test.rs` +- Modify: `tests/scene_generator_html_test.rs` +- Modify/Create: relevant fixtures under `tests/fixtures/generated_scene/` + +**Goal:** Lock the rectification against the two reference scene families and ensure future changes do not reintroduce the same false positives. + +- [ ] **Step 1: Regress marketing-like fixture** + +Verify the marketing reference path now satisfies: + +- non-degenerate `sceneId` +- bootstrap rooted in `yx.gs.sgcc.com.cn` family +- workflow includes `paginate` +- workflow includes `secondary_request` +- readiness does not pass if any of the above are missing + +- [ ] **Step 2: Regress tq-like fixture** + +Verify the tq reference path still satisfies: + +- stable semantic `sceneId` +- valid non-localhost bootstrap +- genuine `multi_mode_request` detection +- no downgrade caused by the stricter marketing rectification rules + +- [ ] **Step 3: Run verification commands** + +Run: + +```bash +cargo check +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_html_test -- --nocapture +node --check frontend/scene-generator/llm-client.js +node --check frontend/scene-generator/generator-runner.js +node --check frontend/scene-generator/server.js +``` + +Expected result: rectification passes both Rust and Node validation plus regression coverage. + +- [ ] **Step 4: Record outcomes in generated reports if needed** + +If the implementation emits readiness or analysis JSON reports, ensure the test fixtures assert the key blocked/passed states directly. + +- [ ] **Step 5: Commit** + +```bash +git add tests/scene_generator_test.rs tests/scene_generator_html_test.rs tests/fixtures/generated_scene frontend/scene-generator/llm-client.js frontend/scene-generator/generator-runner.js frontend/scene-generator/server.js src/generated_scene/analyzer.rs src/generated_scene/ir.rs src/generated_scene/generator.rs +git commit -m "test(generator): lock generated scene rectification regressions" +``` + +--- + +## Acceptance Criteria + +This plan is complete when all of the following are true: + +1. Chinese-source scene names no longer degrade into low-entropy ids like `2-0`. +2. `localhost`, `127.0.0.1`, export services, and helper URLs no longer compete for bootstrap resolution. +3. `paginated_enrichment` routing only occurs when pagination, secondary request, and post-process evidence are all present. +4. Incomplete evidence paths fail closed with explicit readiness gate failures instead of generating false-positive runnable skills. +5. The marketing-like and tq-like reference scenes both remain covered by automated regression tests. + +## Rollback Strategy + +If this rectification causes unacceptable regressions: + +1. Revert the latest rectification task commit only, not unrelated generated-scene work. +2. Keep the previous `Scene IR` and compiler structure intact. +3. Preserve newly added fixtures and tests where possible, then relax only the specific gate or classifier that caused the regression. + +## Notes For Executors + +- Implement this plan strictly in order: naming, bootstrap, workflow, readiness, verification. +- Do not skip ahead to UI polish before the gating logic is in place. +- Do not add speculative resolver or login work under this plan. +- Any need for user override or forced draft generation must be handled as a separate follow-up spec, not smuggled into this rectification plan. diff --git a/docs/superpowers/plans/2026-04-17-llm-driven-skill-generation-plan.md b/docs/superpowers/plans/2026-04-17-llm-driven-skill-generation-plan.md new file mode 100644 index 0000000..c21c511 --- /dev/null +++ b/docs/superpowers/plans/2026-04-17-llm-driven-skill-generation-plan.md @@ -0,0 +1,1452 @@ +# LLM-Driven Skill Generation Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Enhance `sg_scene_generate` to generate complete, runnable skill packages instead of skeleton code by deeply analyzing scene source code (index.html) with LLM to extract API endpoints, static params, column definitions, and business logic. + +**Architecture:** +- LLM reads `index.html` from scene directory +- Extracts complete SceneInfo (sceneId, sceneName, apiEndpoints, staticParams, columnDefs, businessLogic) +- Web UI shows preview for user confirmation +- Rust CLI receives extracted info via `--scene-info-json` parameter +- Rust template renders complete browser_script with business logic + +**Tech Stack:** JavaScript (Node.js), Rust, HTML/CSS, OpenAI-compatible LLM API + +--- + +## Scope Check + +This plan covers the enhancement of existing scene skill generator to support LLM-driven deep extraction. It builds upon: +- Existing `frontend/scene-generator/` files (server.js, llm-client.js, generator-runner.js) +- Existing `src/generated_scene/generator.rs` and `src/bin/sg_scene_generate.rs` + +--- + +## File Map + +### Modified Files + +| File | Changes | +|------|---------| +| `frontend/scene-generator/llm-client.js` | Add deep extraction prompt + `analyzeSceneDeep()` | +| `frontend/scene-generator/generator-runner.js` | Add `index.html` reading in `readDirectory()` | +| `frontend/scene-generator/server.js` | New `/analyze-deep` route, pass sceneInfo to generator | +| `src/bin/sg_scene_generate.rs` | Add `--scene-info-json` CLI parameter | +| `src/generated_scene/generator.rs` | Add SceneInfo struct, enhanced template rendering | +| `frontend/scene-generator/sg_scene_generator.html` | Add extraction preview UI | + +### Reference Files (not modified) + +| File | Purpose | +|------|---------| +| `docs/superpowers/specs/2026-04-17-llm-driven-skill-generation-design.md` | Design spec | +| `claw/skills/skill_staging/skills/tq-lineloss-report/scripts/collect_tq_lineloss_report.js` | Reference complete script (433 lines) | +| `claw/skills/skill_staging/skills/marketing-zero-consumer-report/scripts/collect_marketing_zero_consumer_report.js` | Reference skeleton (51 lines) | + +--- + +## Scope Guardrails + +- Do not change existing API contracts for backward compatibility +- Do not require `index.html` to exist (fallback to current behavior) +- Do not break existing `--scene-id`, `--scene-name` CLI arguments +- Do not add npm dependencies (only Node.js built-in modules) + +--- + +### Task 1: Enhance llm-client.js with Deep Extraction + +**Files:** +- Modify: `frontend/scene-generator/llm-client.js` + +**Goal:** Add a new function `analyzeSceneDeep()` that reads index.html content and extracts complete SceneInfo including API endpoints, static params, column definitions, and business logic. + +- [ ] **Step 1: Add DEEP_SYSTEM_PROMPT constant** + +Add after the existing `SYSTEM_PROMPT` constant in `llm-client.js`: + +```javascript +const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码,提取关键业务信息。 + +## 分析目标 + +1. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, 用途) +2. **静态参数**: 识别硬编码的业务参数 (key-value pairs) +3. **列定义**: 识别数据表格/导出的列配置 ([field, label] pairs) +4. **业务逻辑**: 理解数据获取和转换流程 +5. **场景类型**: 判断是 report_collection 还是 monitoring + +## 输出格式 + +请以 JSON 格式返回: +{ + "sceneId": "string - 场景标识 (英文短横线)", + "sceneName": "string - 场景中文名", + "sceneKind": "report_collection | monitoring", + "sourceSystem": "string - 来源系统名 (可选)", + "expectedDomain": "string - 目标域名 (可选)", + "targetUrl": "string | null - 目标页面URL", + "apiEndpoints": [ + {"name": "string", "url": "string", "method": "GET|POST", "description": "string"} + ], + "staticParams": {"key": "value"}, + "columnDefs": [["fieldName", "中文列名"]], + "entryMethod": "string - 入口方法名", + "businessLogic": { + "dataFetch": "string - 数据获取逻辑描述", + "dataTransform": "string - 数据转换逻辑描述" + } +}`; +``` + +- [ ] **Step 2: Add buildDeepAnalyzePrompt function** + +Add after `buildAnalyzePrompt` function: + +```javascript +function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) { + const parts = []; + + parts.push(`=== 目录结构 ===`); + parts.push(dirContents.tree || "(empty)"); + + if (dirContents["scene.toml"]) { + parts.push(`\n=== scene.toml ===`); + parts.push(dirContents["scene.toml"]); + } + + if (dirContents["SKILL.toml"]) { + parts.push(`\n=== SKILL.toml ===`); + parts.push(dirContents["SKILL.toml"]); + } + + if (dirContents["SKILL.md"]) { + parts.push(`\n=== SKILL.md ===`); + parts.push(dirContents["SKILL.md"]); + } + + // Include index.html content (key addition) + if (indexHtmlContent) { + parts.push(`\n=== index.html ===`); + // Limit to first 15000 chars to avoid token limits + parts.push(indexHtmlContent.substring(0, 15000)); + } + + if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) { + parts.push(`\n=== 脚本文件 ===`); + for (const [name, content] of Object.entries(dirContents.scripts)) { + parts.push(`\n--- ${name} ---`); + parts.push(content.substring(0, 3000)); + } + } + + return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请分析以上代码,提取完整的场景信息。`; +} +``` + +- [ ] **Step 3: Add extractSceneInfo function** + +Add after `extractJsonFromResponse` function: + +```javascript +function extractSceneInfo(text) { + // Try code block first + const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); + if (codeBlockMatch) { + try { + return JSON.parse(codeBlockMatch[1]); + } catch (e) { + // fall through + } + } + + // Try to find JSON object with sceneId + const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/); + if (jsonMatch) { + try { + return JSON.parse(jsonMatch[0]); + } catch (e) { + // fall through + } + } + + // Last resort: parse entire text + try { + return JSON.parse(text); + } catch (e) { + throw new Error("Failed to extract valid SceneInfo JSON from LLM response"); + } +} +``` + +- [ ] **Step 4: Add analyzeSceneDeep function** + +Add after `analyzeScene` function: + +```javascript +function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) { + const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent); + + const requestBody = JSON.stringify({ + model, + messages: [ + { role: "system", content: DEEP_SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + temperature: 0.1, + max_tokens: 2048, // Increased for detailed response + }); + + return new Promise((resolve, reject) => { + const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions"); + const options = { + hostname: url.hostname, + port: url.port || (url.protocol === "https:" ? 443 : 80), + path: url.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + "Content-Length": Buffer.byteLength(requestBody), + }, + }; + + const httpModule = url.protocol === "https:" ? https : http; + const req = httpModule.request(options, (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => { + if (res.statusCode !== 200) { + return reject(new Error(`LLM API error ${res.statusCode}: ${data}`)); + } + + try { + const parsed = JSON.parse(data); + const content = parsed.choices?.[0]?.message?.content; + if (!content) return reject(new Error("LLM returned empty response")); + const result = extractSceneInfo(content); + + // Validate required fields + if (!result.sceneId || !result.sceneName) { + return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`)); + } + + // Set defaults for optional fields + result.sceneKind = result.sceneKind || "report_collection"; + result.apiEndpoints = result.apiEndpoints || []; + result.staticParams = result.staticParams || {}; + result.columnDefs = result.columnDefs || []; + result.businessLogic = result.businessLogic || {}; + + resolve(result); + } catch (err) { + reject(new Error(`Failed to parse LLM response: ${err.message}`)); + } + }); + }); + + req.on("error", reject); + req.setTimeout(60000, () => { + req.destroy(new Error("LLM API request timed out")); + }); + + req.write(requestBody); + req.end(); + }); +} +``` + +- [ ] **Step 5: Add http module import and update exports** + +At the top of the file, add `http` import alongside `https`: + +```javascript +const http = require("http"); +const https = require("https"); +``` + +Update the exports at the bottom: + +```javascript +module.exports = { + buildAnalyzePrompt, + extractJsonFromResponse, + analyzeScene, + // New exports + buildDeepAnalyzePrompt, + extractSceneInfo, + analyzeSceneDeep, +}; +``` + +- [ ] **Step 6: Verify syntax** + +Run: `node -c frontend/scene-generator/llm-client.js` +Expected: No syntax errors + +- [ ] **Step 7: Commit** + +```bash +git add frontend/scene-generator/llm-client.js +git commit -m "feat(llm-client): add deep extraction with apiEndpoints, staticParams, columnDefs" +``` + +--- + +### Task 2: Enhance generator-runner.js to Read index.html + +**Files:** +- Modify: `frontend/scene-generator/generator-runner.js` + +**Goal:** Modify `readDirectory()` to also read `index.html` content. + +- [ ] **Step 1: Add index.html reading in readDirectory function** + +Locate the `readDirectory` function and add index.html reading after the SKILL.md section: + +```javascript +// After the SKILL.md reading section, add: + +const indexHtmlPath = p.join(sourceDir, "index.html"); +if (fs.existsSync(indexHtmlPath)) { + result.indexHtml = fs.readFileSync(indexHtmlPath, "utf-8"); +} +``` + +The complete modified function should look like: + +```javascript +function readDirectory(sourceDir) { + const fs = require("fs"); + const p = require("path"); + + if (!fs.existsSync(sourceDir)) { + throw new Error(`Directory not found: ${sourceDir}`); + } + + const stat = fs.statSync(sourceDir); + if (!stat.isDirectory()) { + throw new Error(`Not a directory: ${sourceDir}`); + } + + const result = {}; + const entries = fs.readdirSync(sourceDir, { withFileTypes: true }); + + const treeLines = []; + for (const entry of entries) { + treeLines.push(`├── ${entry.name}`); + } + result.tree = treeLines.join("\n"); + + const sceneTomlPath = p.join(sourceDir, "scene.toml"); + if (fs.existsSync(sceneTomlPath)) { + result["scene.toml"] = fs.readFileSync(sceneTomlPath, "utf-8"); + } + + const skillTomlPath = p.join(sourceDir, "SKILL.toml"); + if (fs.existsSync(skillTomlPath)) { + result["SKILL.toml"] = fs.readFileSync(skillTomlPath, "utf-8"); + } + + const skillMdPath = p.join(sourceDir, "SKILL.md"); + if (fs.existsSync(skillMdPath)) { + result["SKILL.md"] = fs.readFileSync(skillMdPath, "utf-8"); + } + + // NEW: Read index.html + const indexHtmlPath = p.join(sourceDir, "index.html"); + if (fs.existsSync(indexHtmlPath)) { + result.indexHtml = fs.readFileSync(indexHtmlPath, "utf-8"); + } + + const scripts = {}; + for (const entry of entries) { + if (entry.isFile() && entry.name.endsWith(".js")) { + const scriptPath = p.join(sourceDir, entry.name); + scripts[entry.name] = fs.readFileSync(scriptPath, "utf-8"); + } + } + if (Object.keys(scripts).length > 0) { + result.scripts = scripts; + } + + return result; +} +``` + +- [ ] **Step 2: Verify syntax** + +Run: `node -c frontend/scene-generator/generator-runner.js` +Expected: No syntax errors + +- [ ] **Step 3: Commit** + +```bash +git add frontend/scene-generator/generator-runner.js +git commit -m "feat(generator-runner): read index.html in readDirectory()" +``` + +--- + +### Task 3: Add /analyze-deep Route in server.js + +**Files:** +- Modify: `frontend/scene-generator/server.js` + +**Goal:** Add new `/analyze-deep` endpoint that calls the deep extraction LLM function. + +- [ ] **Step 1: Update llm-client import** + +Change the import line at the top: + +```javascript +const { analyzeScene, analyzeSceneDeep } = require("./llm-client"); +``` + +- [ ] **Step 2: Add handleAnalyzeDeep function** + +Add after the existing `handleAnalyze` function: + +```javascript +async function handleAnalyzeDeep(req, res) { + let body; + try { + body = await parseBody(req); + } catch { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid JSON body" })); + return; + } + + const sourceDir = (body.sourceDir || "").replace(/\\/g, "/"); + if (!sourceDir) { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "sourceDir is required" })); + return; + } + + let dirContents; + try { + dirContents = readDirectory(sourceDir); + } catch (err) { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: err.message })); + return; + } + + try { + const indexHtmlContent = dirContents.indexHtml || null; + const result = await analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, config); + + // Log extraction results for debugging + console.log(`[analyze-deep] Extracted scene: ${result.sceneId} / ${result.sceneName}`); + console.log(`[analyze-deep] API endpoints: ${result.apiEndpoints?.length || 0}`); + console.log(`[analyze-deep] Column defs: ${result.columnDefs?.length || 0}`); + + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(result)); + } catch (err) { + console.error(`[analyze-deep] Error: ${err.message}`); + res.writeHead(502, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + error: `Deep analysis failed: ${err.message}`, + hint: "You can still use basic analysis or enter data manually", + }) + ); + } +} +``` + +- [ ] **Step 3: Add route in server request handler** + +In the `http.createServer` handler, add the new route after `/analyze`: + +```javascript +} else if (pathname === "/analyze-deep" && req.method === "POST") { + await handleAnalyzeDeep(req, res); +``` + +- [ ] **Step 4: Verify syntax** + +Run: `node -c frontend/scene-generator/server.js` +Expected: No syntax errors + +- [ ] **Step 5: Commit** + +```bash +git add frontend/scene-generator/server.js +git commit -m "feat(server): add /analyze-deep endpoint for deep extraction" +``` + +--- + +### Task 4: Add --scene-info-json CLI Parameter + +**Files:** +- Modify: `src/bin/sg_scene_generate.rs` +- Modify: `src/generated_scene/generator.rs` + +**Goal:** Add `--scene-info-json` parameter to Rust CLI to receive pre-extracted scene info from the Node.js server. + +- [ ] **Step 1: Add SceneInfoJson struct in generator.rs** + +In `src/generated_scene/generator.rs`, add after imports: + +```rust +use std::collections::HashMap; + +#[derive(Debug, Clone, serde::Deserialize)] +pub struct ApiEndpointJson { + pub name: String, + pub url: String, + #[serde(default)] + pub method: String, + #[serde(default)] + pub description: Option, +} + +#[derive(Debug, Clone, serde::Deserialize)] +pub struct BusinessLogicJson { + #[serde(default)] + pub data_fetch: Option, + #[serde(default)] + pub data_transform: Option, +} + +#[derive(Debug, Clone, serde::Deserialize)] +pub struct SceneInfoJson { + #[serde(rename = "sceneId")] + pub scene_id: String, + #[serde(rename = "sceneName")] + pub scene_name: String, + #[serde(rename = "sceneKind", default)] + pub scene_kind: String, + #[serde(rename = "sourceSystem", default)] + pub source_system: Option, + #[serde(rename = "expectedDomain", default)] + pub expected_domain: Option, + #[serde(rename = "targetUrl", default)] + pub target_url: Option, + #[serde(rename = "apiEndpoints", default)] + pub api_endpoints: Vec, + #[serde(rename = "staticParams", default)] + pub static_params: HashMap, + #[serde(rename = "columnDefs", default)] + pub column_defs: Vec<(String, String)>, + #[serde(rename = "entryMethod", default)] + pub entry_method: Option, + #[serde(rename = "businessLogic", default)] + pub business_logic: Option, +} +``` + +- [ ] **Step 2: Add scene_info_json field to GenerateSceneRequest** + +In `src/generated_scene/generator.rs`, modify `GenerateSceneRequest`: + +```rust +#[derive(Debug, Clone)] +pub struct GenerateSceneRequest { + pub source_dir: PathBuf, + pub scene_id: String, + pub scene_name: String, + pub scene_kind: Option, + pub target_url: Option, + pub output_root: PathBuf, + pub lessons_path: Option, + // NEW + pub scene_info_json: Option, +} +``` + +- [ ] **Step 3: Modify browser_script function to use SceneInfo** + +Replace the existing `browser_script` function with enhanced version: + +```rust +fn browser_script(scene_id: &str, analysis: &SceneSourceAnalysis, scene_info: Option<&SceneInfoJson>) -> String { + // If we have scene info with business logic, generate enhanced script + if let Some(info) = scene_info { + if !info.api_endpoints.is_empty() || !info.column_defs.is_empty() { + return browser_script_with_business_logic(scene_id, info); + } + } + + // Fallback to skeleton template + browser_script_skeleton(scene_id, analysis) +} + +fn browser_script_skeleton(scene_id: &str, _analysis: &SceneSourceAnalysis) -> String { + // Keep existing skeleton template + format!( + "function normalizePayload(payload) {{ + if (typeof payload === 'string') {{ + try {{ return JSON.parse(payload); }} catch (_) {{ return {{}}; }} + }} + return payload && typeof payload === 'object' ? payload : {{}}; +}} + +async function buildBrowserEntrypointResult(args, deps = {{}}) {{ + const rows = typeof deps.collectRows === 'function' + ? await deps.collectRows(args) + : [{{ + org_label: args.org_label || '', + org_code: args.org_code || '', + period_mode: args.period_mode || '', + period_value: args.period_value || '', + value: '' + }}]; + return {{ + type: 'report-artifact', + report_name: '{}', + status: rows.length > 0 ? 'ok' : 'empty', + period: {{ + mode: args.period_mode, + mode_code: args.period_mode_code, + value: args.period_value, + payload: normalizePayload(args.period_payload) + }}, + org: {{ label: args.org_label, code: args.org_code }}, + column_defs: [ + ['org_label', '供电单位'], + ['org_code', '供电单位编码'], + ['period_mode', '统计周期类型'], + ['period_value', '统计周期'], + ['value', '采集值'] + ], + columns: ['org_label', 'org_code', 'period_mode', 'period_value', 'value'], + rows, + counts: {{ detail_rows: rows.length }}, + partial_reasons: [], + reasons: [] + }}; +}} + +if (typeof module !== 'undefined') {{ + module.exports = {{ buildBrowserEntrypointResult, normalizePayload }}; +}} + +if (typeof args !== 'undefined') {{ + return buildBrowserEntrypointResult(args); +}} +", + scene_id + ) +} + +fn browser_script_with_business_logic(scene_id: &str, info: &SceneInfoJson) -> String { + // Generate API endpoints constant + let api_endpoints_code = info.api_endpoints.iter() + .map(|ep| format!(" {}: '{}',", ep.name, ep.url)) + .collect::>() + .join("\n"); + + // Generate static params constant + let static_params_code = info.static_params.iter() + .map(|(k, v)| format!(" {}: '{}',", k, v)) + .collect::>() + .join("\n"); + + // Generate column defs + let column_defs_code = info.column_defs.iter() + .map(|(field, label)| format!(" ['{}', '{}'],", field, label)) + .collect::>() + .join("\n"); + + let columns_code = info.column_defs.iter() + .map(|(field, _)| format!("'{}'", field)) + .collect::>() + .join(", "); + + let primary_api = info.api_endpoints.first() + .map(|ep| ep.url.clone()) + .unwrap_or_else(|| "/api/data".to_string()); + + let expected_domain = info.expected_domain.as_deref().unwrap_or(""); + + format!(r#"// ===== 自动生成部分 ===== + +const REPORT_NAME = '{scene_id}'; +const EXPECTED_DOMAIN = '{expected_domain}'; + +// API 端点 +const API_ENDPOINTS = {{ +{api_endpoints_code} +}}; + +// 静态参数 +const STATIC_PARAMS = {{ +{static_params_code} +}}; + +// 列定义 +const COLUMN_DEFS = [ +{column_defs_code} +]; +const COLUMNS = [{columns_code}]; + +// ===== 标准框架 ===== + +function normalizePayload(payload) {{ + if (typeof payload === 'string') {{ + try {{ return JSON.parse(payload); }} catch (_) {{ return {{}}; }} + }} + return payload && typeof payload === 'object' ? payload : {{}}; +}} + +function validateArgs(args) {{ + const reasons = []; + if (!args.org_code) reasons.push('missing org_code'); + if (!args.period_value) reasons.push('missing period_value'); + return reasons.length === 0 ? {{ ok: true }} : {{ ok: false, reasons }}; +}} + +function buildRequest(args) {{ + return {{ + orgCode: args.org_code, + periodMode: args.period_mode, + periodValue: args.period_value, + ...STATIC_PARAMS + }}; +}} + +function normalizeRows(rawRows) {{ + if (!Array.isArray(rawRows)) return []; + return rawRows.map((row, index) => ({{ + org_label: row.orgLabel || row.org_label || '', + org_code: row.orgCode || row.org_code || args.org_code || '', + period_mode: args.period_mode || '', + period_value: args.period_value || '', + ...row + }})); +}} + +function buildArtifact(opts) {{ + return {{ + type: 'report-artifact', + report_name: REPORT_NAME, + status: opts.status || 'ok', + period: {{ + mode: args.period_mode, + mode_code: args.period_mode_code, + value: args.period_value, + payload: normalizePayload(args.period_payload) + }}, + org: {{ label: args.org_label, code: args.org_code }}, + column_defs: COLUMN_DEFS, + columns: COLUMNS, + rows: opts.rows || [], + counts: {{ detail_rows: (opts.rows || []).length }}, + partial_reasons: opts.partial_reasons || [], + reasons: opts.reasons || [] + }}; +}} + +async function buildBrowserEntrypointResult(args, deps = defaultDeps()) {{ + // 1. 参数验证 + const validation = validateArgs(args); + if (!validation.ok) {{ + return buildArtifact({{ status: 'blocked', reasons: validation.reasons }}); + }} + + // 2. 页面上下文验证 + const pageValidation = deps.validatePageContext?.(args); + if (!pageValidation?.ok) {{ + return buildArtifact({{ status: 'blocked', reasons: ['page_context_mismatch'] }}); + }} + + // 3. 数据获取 + try {{ + const request = buildRequest(args); + const response = await deps.queryData(request); + const rows = normalizeRows(response.rows || response.data || []); + + return buildArtifact({{ + status: rows.length > 0 ? 'ok' : 'empty', + rows + }}); + }} catch (error) {{ + return buildArtifact({{ status: 'error', reasons: [error.message] }}); + }} +}} + +// ===== 默认依赖实现 ===== + +function defaultDeps() {{ + return {{ + validatePageContext(args) {{ + const host = globalThis.location?.hostname; + return host === args.expected_domain || host === EXPECTED_DOMAIN + ? {{ ok: true }} + : {{ ok: false, reason: 'domain_mismatch' }}; + }}, + + async queryData(request) {{ + // 根据 API_ENDPOINTS 调用实际接口 + if (typeof $ !== 'undefined' && typeof $.ajax === 'function') {{ + return new Promise((resolve, reject) => {{ + $.ajax({{ + url: API_ENDPOINTS.primary || '{primary_api}', + type: 'POST', + data: JSON.stringify(request), + contentType: 'application/json', + success: resolve, + error: (xhr, status, err) => reject(new Error(`API failed: ${{err}}`)), + }}); + }}); + }} + // Fallback: fetch API + if (typeof fetch === 'function') {{ + const response = await fetch(API_ENDPOINTS.primary || '{primary_api}', {{ + method: 'POST', + headers: {{ 'Content-Type': 'application/json' }}, + body: JSON.stringify(request) + }}); + return response.json(); + }} + throw new Error('No HTTP client available'); + }}, + }}; +}} + +// ===== 模块导出 ===== + +if (typeof module !== 'undefined') {{ + module.exports = {{ + buildBrowserEntrypointResult, + validateArgs, + buildRequest, + normalizeRows, + COLUMN_DEFS, + COLUMNS, + }}; +}} + +if (typeof args !== 'undefined') {{ + return buildBrowserEntrypointResult(args); +}} +"#, scene_id = scene_id, expected_domain = expected_domain, api_endpoints_code = api_endpoints_code, static_params_code = static_params_code, column_defs_code = column_defs_code, columns_code = columns_code, primary_api = primary_api) +} +``` + +- [ ] **Step 4: Update generate_scene_package function** + +Modify `generate_scene_package` in generator.rs to pass scene_info: + +```rust +pub fn generate_scene_package( + request: GenerateSceneRequest, +) -> Result { + let analysis = analyze_scene_source_with_hint(&request.source_dir, request.scene_kind.clone())?; + // ... existing code ... + + write_file( + &scripts_dir.join(format!("{tool_name}.js")), + &browser_script(&request.scene_id, &analysis, request.scene_info_json.as_ref()), + )?; + + // ... rest of function ... +} +``` + +- [ ] **Step 5: Add CLI parameter in sg_scene_generate.rs** + +Modify `CliArgs` struct: + +```rust +struct CliArgs { + source_dir: PathBuf, + scene_id: String, + scene_name: String, + scene_kind: Option, + target_url: Option, + output_root: PathBuf, + lessons_path: Option, + // NEW + scene_info_json: Option, +} +``` + +Add parsing in `parse_args`: + +```rust +fn parse_args(args: impl Iterator) -> Result { + // ... existing code ... + let mut scene_info_json = None; + // ... in match block ... + "--scene-info-json" => scene_info_json = Some(arg), + // ... +} +``` + +Parse JSON in `run`: + +```rust +fn run() -> Result<(), String> { + let args = parse_args(env::args().skip(1))?; + + let scene_info = args.scene_info_json + .map(|json| serde_json::from_str(&json)) + .transpose() + .map_err(|e| format!("Invalid scene-info-json: {}", e))?; + + let skill_root = generate_scene_package(GenerateSceneRequest { + source_dir: args.source_dir, + scene_id: args.scene_id, + scene_name: args.scene_name, + scene_kind: args.scene_kind, + target_url: args.target_url, + output_root: args.output_root, + lessons_path: args.lessons_path, + scene_info_json: scene_info, + }) + .map_err(|err| err.to_string())?; + + println!("generated scene package: {}", skill_root.display()); + Ok(()) +} +``` + +Update usage: + +```rust +fn usage() -> String { + "usage: sg_scene_generate --source-dir --scene-id --scene-name [--scene-kind ] [--target-url ] --output-root [--lessons ] [--scene-info-json '']".to_string() +} +``` + +- [ ] **Step 6: Verify Rust compilation** + +Run: `cargo check` +Expected: No compilation errors + +- [ ] **Step 7: Commit** + +```bash +git add src/bin/sg_scene_generate.rs src/generated_scene/generator.rs +git commit -m "feat(rust): add --scene-info-json parameter for LLM extraction results" +``` + +--- + +### Task 5: Update Web UI with Extraction Preview + +**Files:** +- Modify: `frontend/scene-generator/sg_scene_generator.html` + +**Goal:** Add UI elements to show extraction results and allow user confirmation before generation. + +- [ ] **Step 1: Add extraction results preview section** + +Add after the existing form section, a new collapsible panel for extraction preview: + +```html + + +``` + +- [ ] **Step 2: Add CSS for preview panel** + +Add styles: + +```css +.preview-panel { + background: rgba(255, 255, 255, 0.05); + border-radius: 12px; + border: 1px solid rgba(255, 255, 255, 0.1); + overflow: hidden; +} + +.preview-header { + display: flex; + justify-content: space-between; + align-items: center; + padding: 16px 20px; + cursor: pointer; + background: rgba(255, 255, 255, 0.03); +} + +.preview-header h3 { + margin: 0; + font-size: 16px; +} + +.preview-content { + padding: 20px; +} + +.preview-section { + margin-bottom: 20px; +} + +.preview-section h4 { + margin: 0 0 10px 0; + font-size: 14px; + color: #a78bfa; +} + +.preview-row { + display: flex; + margin-bottom: 8px; +} + +.preview-row .label { + width: 100px; + color: #888; + flex-shrink: 0; +} + +.preview-row .value { + color: #fff; +} + +.preview-list { + max-height: 150px; + overflow-y: auto; + background: rgba(0, 0, 0, 0.2); + border-radius: 8px; + padding: 10px; +} + +.preview-list-item { + padding: 6px 0; + border-bottom: 1px solid rgba(255, 255, 255, 0.05); +} + +.preview-code { + background: rgba(0, 0, 0, 0.3); + padding: 10px; + border-radius: 8px; + font-family: monospace; + font-size: 12px; + overflow-x: auto; + white-space: pre-wrap; +} +``` + +- [ ] **Step 3: Add JavaScript for deep analysis and preview** + +```javascript +let currentSceneInfo = null; + +async function analyzeDeep() { + const sourceDir = document.getElementById('sourceDir').value; + if (!sourceDir) { + alert('请先选择场景目录'); + return; + } + + showStatus('正在深度分析...'); + + try { + const response = await fetch(`${SERVER_URL}/analyze-deep`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ sourceDir }) + }); + + const data = await response.json(); + + if (data.error) { + showStatus('分析失败: ' + data.error); + return; + } + + currentSceneInfo = data; + + // Update form fields + document.getElementById('sceneId').value = data.sceneId || ''; + document.getElementById('sceneName').value = data.sceneName || ''; + document.getElementById('sceneKind').value = data.sceneKind || 'report_collection'; + if (data.targetUrl) { + document.getElementById('targetUrl').value = data.targetUrl; + } + + // Show preview + showExtractionPreview(data); + showStatus('分析完成,请确认提取结果'); + + } catch (err) { + showStatus('分析失败: ' + err.message); + } +} + +function showExtractionPreview(data) { + document.getElementById('previewSceneId').textContent = data.sceneId || '-'; + document.getElementById('previewSceneName').textContent = data.sceneName || '-'; + document.getElementById('previewSceneKind').textContent = data.sceneKind || '-'; + document.getElementById('previewExpectedDomain').textContent = data.expectedDomain || '-'; + + // API endpoints + const apiList = document.getElementById('previewApiEndpoints'); + const apiCount = (data.apiEndpoints || []).length; + document.getElementById('previewApiCount').textContent = apiCount; + apiList.innerHTML = (data.apiEndpoints || []).map(ep => ` +
+ ${ep.name}: ${ep.url} + [${ep.method || 'GET'}] +
+ `).join('') || '
无 API 端点
'; + + // Column defs + const colList = document.getElementById('previewColumnDefs'); + const colCount = (data.columnDefs || []).length; + document.getElementById('previewColumnCount').textContent = colCount; + colList.innerHTML = (data.columnDefs || []).map(([field, label]) => ` +
+ ${field} → ${label} +
+ `).join('') || '
无列定义
'; + + // Static params + document.getElementById('previewStaticParams').textContent = + JSON.stringify(data.staticParams || {}, null, 2) || '{}'; + + // Business logic + document.getElementById('previewDataFetch').textContent = + data.businessLogic?.dataFetch || '-'; + document.getElementById('previewDataTransform').textContent = + data.businessLogic?.dataTransform || '-'; + + document.getElementById('extractionPreview').style.display = 'block'; +} + +function togglePreview() { + const content = document.getElementById('previewContent'); + const icon = document.getElementById('previewToggleIcon'); + if (content.style.display === 'none') { + content.style.display = 'block'; + icon.textContent = '▼'; + } else { + content.style.display = 'none'; + icon.textContent = '▶'; + } +} +``` + +- [ ] **Step 4: Add "深度分析" button** + +Add a new button in the button group: + +```html + +``` + +- [ ] **Step 5: Update generate function to pass sceneInfo** + +Modify the generate function to include scene info JSON: + +```javascript +async function generate() { + const params = { + sourceDir: document.getElementById('sourceDir').value, + sceneId: document.getElementById('sceneId').value, + sceneName: document.getElementById('sceneName').value, + sceneKind: document.getElementById('sceneKind').value, + targetUrl: document.getElementById('targetUrl').value || null, + outputRoot: document.getElementById('outputRoot').value, + lessons: document.getElementById('lessons').value || null, + }; + + // Add scene info JSON if available + if (currentSceneInfo) { + params.sceneInfoJson = JSON.stringify(currentSceneInfo); + } + + // ... rest of generate function ... +} +``` + +- [ ] **Step 6: Verify UI loads** + +Run the server and open the page in browser: +```bash +cd frontend/scene-generator && node server.js +``` +Open `http://127.0.0.1:3210/` + +Expected: Page loads without JavaScript errors + +- [ ] **Step 7: Commit** + +```bash +git add frontend/scene-generator/sg_scene_generator.html +git commit -m "feat(ui): add deep extraction preview panel with API/column/static-params display" +``` + +--- + +### Task 6: Update generator-runner.js to Pass sceneInfoJson + +**Files:** +- Modify: `frontend/scene-generator/generator-runner.js` + +**Goal:** Update `runGenerator` to pass `sceneInfoJson` parameter to Rust CLI. + +- [ ] **Step 1: Modify runGenerator function** + +Update the function to accept and pass `sceneInfoJson`: + +```javascript +function runGenerator(params, sseWriter, projectRoot) { + const { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson } = params; + + const normalize = (p) => p.replace(/\\/g, "/"); + + const args = [ + "run", + "--bin", + "sg_scene_generate", + "--", + "--source-dir", + normalize(sourceDir), + "--scene-id", + sceneId, + "--scene-name", + sceneName, + ]; + + if (sceneKind) { + args.push("--scene-kind", sceneKind); + } + + if (targetUrl) { + args.push("--target-url", targetUrl); + } + + args.push("--output-root", normalize(outputRoot)); + + if (lessons) { + args.push("--lessons", normalize(lessons)); + } + + // NEW: Pass scene info JSON + if (sceneInfoJson) { + args.push("--scene-info-json", sceneInfoJson); + } + + // ... rest of function unchanged ... +} +``` + +- [ ] **Step 2: Update server.js handleGenerate** + +Ensure `handleGenerate` passes the new parameter: + +```javascript +async function handleGenerate(req, res) { + let body; + try { + body = await parseBody(req); + } catch { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid JSON body" })); + return; + } + + const { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson } = body; + if (!sourceDir || !sceneId || !sceneName || !outputRoot) { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + error: "All fields required: sourceDir, sceneId, sceneName, outputRoot", + }) + ); + return; + } + + const sseWriter = initSSE(res); + + try { + await runGenerator( + { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson }, + sseWriter, + config.projectRoot + ); + } catch (err) { + writeSSE(sseWriter, "error", { message: `Server error: ${err.message}` }); + } + + sseWriter.end(); +} +``` + +- [ ] **Step 3: Verify syntax** + +Run: `node -c frontend/scene-generator/generator-runner.js && node -c frontend/scene-generator/server.js` +Expected: No syntax errors + +- [ ] **Step 4: Commit** + +```bash +git add frontend/scene-generator/generator-runner.js frontend/scene-generator/server.js +git commit -m "feat(runner): pass sceneInfoJson to Rust CLI for enhanced template rendering" +``` + +--- + +### Task 7: End-to-End Verification + +**Files:** +- All modified files + +**Goal:** Verify the complete flow works from UI to Rust CLI. + +- [ ] **Step 1: Build Rust binary** + +```bash +cargo build --release --bin sg_scene_generate +``` +Expected: Build succeeds + +- [ ] **Step 2: Start the server** + +```bash +cd frontend/scene-generator && node server.js +``` +Expected: Server starts on port 3210 + +- [ ] **Step 3: Test health endpoint** + +```bash +curl http://127.0.0.1:3210/health +``` +Expected: `{"status":"ok",...}` + +- [ ] **Step 4: Test analyze-deep endpoint with real scene** + +Use a real scene directory with index.html: + +```bash +curl -X POST http://127.0.0.1:3210/analyze-deep \ + -H "Content-Type: application/json" \ + -d '{"sourceDir": "D:/path/to/scene/with/index.html"}' +``` + +Expected: JSON response with sceneId, sceneName, apiEndpoints, columnDefs + +- [ ] **Step 5: Test full generation flow** + +1. Open browser to `http://127.0.0.1:3210/` +2. Select a scene directory with index.html +3. Click "深度分析" button +4. Verify preview shows extracted API/column data +5. Click "生成" button +6. Verify generated script contains extracted API endpoints and column definitions + +- [ ] **Step 6: Compare generated script** + +Compare the generated script with the reference: +- Before: 51 lines (skeleton) +- After: Should have API_ENDPOINTS, COLUMN_DEFS constants populated + +- [ ] **Step 7: Final commit** + +```bash +git add -A +git commit -m "feat: complete LLM-driven skill generation with deep extraction + +- Add /analyze-deep endpoint for deep LLM extraction +- Extract apiEndpoints, staticParams, columnDefs from index.html +- Pass extraction results via --scene-info-json to Rust CLI +- Generate complete browser_script with business logic constants +- Add UI preview panel for extraction results +" +``` + +--- + +## Self-Review + +### 1. Spec Coverage + +| Spec Requirement | Task | +|------------------|------| +| LLM reads index.html | Task 1 (buildDeepAnalyzePrompt), Task 2 (readDirectory) | +| Extract apiEndpoints | Task 1 (DEEP_SYSTEM_PROMPT, analyzeSceneDeep) | +| Extract staticParams | Task 1 (DEEP_SYSTEM_PROMPT, analyzeSceneDeep) | +| Extract columnDefs | Task 1 (DEEP_SYSTEM_PROMPT, analyzeSceneDeep) | +| Extract businessLogic | Task 1 (DEEP_SYSTEM_PROMPT, analyzeSceneDeep) | +| --scene-info-json CLI parameter | Task 4 | +| Enhanced template rendering | Task 4 (browser_script_with_business_logic) | +| Web UI preview | Task 5 | +| User confirmation before generation | Task 5 (extraction preview) | + +All covered. + +### 2. Placeholder Scan + +No TBD/TODO/"implement later"/"add tests"/"similar to" patterns found. + +### 3. Type Consistency + +- `/analyze-deep`: `{ sourceDir }` → `SceneInfoJson` — consistent in Tasks 1, 3, 5 +- `/generate`: `{ ..., sceneInfoJson }` — consistent in Tasks 5, 6 +- SceneInfoJson struct fields match JavaScript extraction output — consistent in Task 1, 4 +- Column defs: `Vec<(String, String)>` matches `[[field, label]]` — consistent + +All consistent. + +### 4. Backward Compatibility + +- Existing `/analyze` endpoint unchanged +- Existing CLI arguments (`--scene-id`, `--scene-name`) still work +- `--scene-info-json` is optional, falls back to skeleton template +- `index.html` reading is optional, falls back if not present diff --git a/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md b/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md new file mode 100644 index 0000000..f05be91 --- /dev/null +++ b/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md @@ -0,0 +1,382 @@ +# sgClaw Scene Skill 60-to-90 Roadmap Plan + +> **Status:** Draft +> **Date:** 2026-04-17 +> **Author:** Codex +> **Upstream Spec:** [2026-04-17-scene-skill-60-to-90-roadmap-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md) + +## Plan Intent + +本计划用于将“scene skill 自动生成能力从 60 分提升到 90 分”的设计方案拆解为可执行的交付阶段、任务边界、验收条件与实施顺序。计划严格服从上游 `spec`,不额外扩展问题空间,不提前引入未在 `spec` 中确认的实现目标。 + +本计划覆盖的核心目标仅包括: + +1. 建立可裁决的语义证据层 +2. 建立最小可编译业务契约 +3. 冻结 P0 样板标准答案 +4. 按 P0 到 P1 的路线推动 scene skill 自动转化能力从结构识别升级到业务语义恢复 + +## Success Criteria Baseline + +本计划默认采用上游 `spec` 中已经收敛的成功标准:阶段性成功不再以“生成结果是否尽量接近某个参考 skill 的结构”作为唯一目标,而是以通用场景生成后的 skill 能否在内网环境中直接运行、拿到正确数据并产出正确报表作为主判定口径。 + +因此,实施验收默认同时检查以下三层闭环: + +1. 执行闭环:生成 skill 可在自研浏览器承载的内网环境中完成执行 +2. 数据闭环:查询、分页、提取后的数据正确且完整 +3. 产物闭环:生成的 Excel 或其他报表符合业务规则 + +## Scope Guardrails + +本计划执行过程中,以下边界保持不变: + +1. 不以“一次覆盖全部 102 个场景”为目标 +2. 不在本计划中展开统一平台登录或目标业务系统后台登录的自动恢复实现 +3. 不把 BrowserAction 全链路抽象一次性做完 +4. 不把复杂文档渲染、模板上传、附件解析场景纳入 P0 +5. 不以“先做更多 prompt 调优”代替证据层、契约层和标准答案建设 + +## Scene Family Baseline + +本计划执行时,默认承接上游 `spec` 对 `102` 个场景的家族分组结果: + +1. `G1` 通用单页报表组:`68` +2. `G2` 多模式报表组:`11` +3. `G3` 分页明细补数组:`10` +4. `G4` 工具检测前置组:`8` +5. `G5` 低优先级噪声组:`5` + +本计划的主线实施范围以 `G1 + G2 + G3` 为主,它们合计 `89` 个场景,约占全部样本的 `87%`。`G4` 作为后续检测类扩展前置保留,`G5` 默认降级处理,不进入首轮主线。 + +## Workstreams + +本计划拆分为四条主工作流: + +1. `WS1` 语义证据层建设 +2. `WS2` 最小可编译业务契约建设 +3. `WS3` P0 标准答案与校准基线建设 +4. `WS4` P0/P1 样板路线落地与验证 + +四条工作流之间的依赖关系为: + +`WS1 + WS2 + WS3 -> WS4` + +## Phase Overview + +计划按五个阶段推进: + +1. Phase 0:冻结边界与样板 +2. Phase 1:建立语义证据层 +3. Phase 2:建立最小可编译业务契约 +4. Phase 3:冻结 P0 canonical answers +5. Phase 4:按 P0/P1 路线逐步验证 60-to-90 能力提升 + +其中 Phase 4 不是按业务部门推进,而是按场景家族推进,顺序固定为: + +1. 先打 `G2` 多模式报表组,验证语义恢复上限 +2. 再打 `G1` 通用单页报表组,验证规模化迁移能力 +3. 再打 `G3` 分页明细补数组,验证复杂 workflow 与 fail-closed +4. `G4` 保留到后续检测类扩展 +5. `G5` 默认降级处理 + +## Phase 0:冻结边界与样板 + +### Objective + +在进入建设阶段前,先冻结问题边界、P0 样板、P1 家族和对标基线,避免实施过程中反复漂移。 + +### Tasks + +1. 固化 P0 样板清单 +2. 固化 P1 家族清单 +3. 固化 `台区线损大数据-月_周累计线损率统计分析 -> tq-lineloss-report` 的 canonical mapping +4. 固化宿主浏览器执行上下文和 `localhost:*` 的语义分类口径 +5. 固化“业务语义层 / 宿主浏览器能力层 / 登录与本地桥接层”的分层约束 +6. 固化 `102` 个场景的五大分组和分组口径 +7. 固化各分组到 archetype / 阶段 / 验收重点的映射关系 + +### Deliverables + +1. 冻结后的样板名单 +2. 样板与 archetype 对照表 +3. 宿主与业务分层约束说明 +4. canonical benchmark 映射说明 +5. 五大场景分组清单 +6. 分组实施映射说明 + +### Exit Criteria + +1. P0 / P1 样板不再变动 +2. `tq-lineloss-report` 已被明确为 P0-1 的 canonical benchmark +3. `localhost:*` 已被明确定义为宿主桥接证据而非默认业务域 +4. `102` 个场景的五大分组和分组实施口径不再漂移 + +## Phase 1:建立语义证据层 + +### Objective + +将“源码直接汇总到 Scene IR”的生成路径,升级为“源码先形成可裁决语义证据,再归约为 Scene IR”的路径。 + +### Tasks + +1. 定义统一证据对象 schema +2. 定义证据来源分层 +3. 定义证据归并与冲突消解规则 +4. 定义证据到 `Scene IR` 的映射边界 +5. 建立核心证据类型集合 + +### Required Evidence Types + +第一版最小证据类型集合固定为: + +1. `bootstrap_candidate` +2. `endpoint_candidate` +3. `mode_candidate` +4. `request_template_candidate` +5. `response_path_candidate` +6. `column_defs_candidate` +7. `normalize_rules_candidate` +8. `workflow_candidate` +9. `localhost_dependency_candidate` +10. `browser_action_candidate` +11. `export_candidate` + +### Deliverables + +1. 证据对象 schema 文档 +2. 证据类型字典 +3. 证据归并规则文档 +4. 证据到 `Scene IR` 的映射规则文档 +5. P0 样板的证据抽取结果样例 + +### Acceptance Criteria + +1. 任一 P0 样板都能输出结构化证据集合 +2. `localhost:*`、宿主 JS 注入、隐藏域行为可进入独立证据槽位 +3. `Scene IR` 的核心字段均可回溯到对应证据来源 +4. 证据冲突时存在明确裁决路径,而不是被最终总结直接吞没 + +## Phase 2:建立最小可编译业务契约 + +### Objective + +把 archetype 判断从“关键词命中”升级为“最小业务契约是否成立”,让 compiler 只接收证据闭合的输入。 + +### Tasks + +1. 定义各 archetype 的最小可编译契约 +2. 定义统一 gate 列表 +3. 定义 gate 失败时的阻断规则 +4. 定义 archetype 最小输出契约 +5. 建立 fail-closed 优先的 readiness 判定口径 + +### Required Gates + +统一 gate 名称最少包括: + +1. `bootstrap_resolved` +2. `request_contract_complete` +3. `response_contract_complete` +4. `workflow_contract_complete` +5. `runtime_contract_compatible` + +### Deliverables + +1. archetype 最小契约表 +2. gate 判定表 +3. blocker / readiness 规则表 +4. archetype 输出契约样例 + +### Acceptance Criteria + +1. `multi_mode_request`、`single_request_table`、`paginated_enrichment` 均有明确最小契约 +2. 没有通过 gate 的场景不能再伪装为 runnable skill +3. readiness 结果能够区分“业务证据不足”和“宿主运行时依赖未满足” +4. compiler 输入边界清晰,不能继续吞入未闭合 IR + +## Phase 3:冻结 P0 Canonical Answers + +### Objective + +为 P0 三个主样板建立稳定的标准答案、关键证据清单和验收基线,作为后续回归与迁移的唯一校准源。 + +### Tasks + +1. 固化三个 P0 样板的标准 `Scene IR` +2. 固化三个 P0 样板的关键证据清单 +3. 固化三个 P0 样板的验收标准 +4. 固化三个 P0 样板的失败 taxonomy +5. 建立 canonical answer 与实际生成结果的比对方式 + +### P0 Canonical Targets + +1. `台区线损大数据-月_周累计线损率统计分析` + 参考 `tq-lineloss-report` +2. `用户日电量监测` + 对标单请求量产样板 +3. `95598工单明细表` + 对标分页补数识别与阻断样板 + +### Deliverables + +1. 三个 P0 样板的 canonical `Scene IR` +2. 三个 P0 样板的关键语义证据基线 +3. 三个 P0 样板的验收表 +4. 三个 P0 样板的失败类型表 + +### Acceptance Criteria + +1. P0-1 能明确以 `tq-lineloss-report` 作为高质量参考样板,而非唯一硬标准答案 +2. 三个 P0 样板都存在“生成结果 vs canonical answer”的对齐方式 +3. 后续每次能力升级均可回归验证是否偏离 P0 标准答案 + +## Phase 4:按 P0/P1 路线逐步验证 60-to-90 提升 + +### Objective + +按照 `spec` 已定义的优先级,以 P0 为主、P1 为扩展,逐步验证自动转化器从结构识别向业务语义恢复的提升路径。 + +本阶段不按业务部门推进,而按场景家族推进。其首轮目标不是“覆盖全部 `102` 个场景”,而是先打穿主流报表型场景,再逐步扩展。 + +### Track A:P0-1 `tq` 主样板 + +#### Goal + +打通 `multi_mode_request.month_week_table` 的主样板能力,并使结果在关键业务语义、内网可执行性与报表正确性上达到 `tq-lineloss-report` 同等级别。 + +#### Tasks + +1. 恢复完整 `month / week` 模式矩阵 +2. 恢复每个模式的请求契约与响应契约 +3. 恢复列定义、归一化规则和导出语义 +4. 校验 bootstrap 与目标系统上下文约束 +5. 建立自动结果与 `tq-lineloss-report` 的关键语义比对 + +#### Acceptance Criteria + +1. `mode matrix` 稳定恢复 +2. 关键 request / response contract 稳定恢复 +3. 生成结果在关键业务语义与内网报表结果上达到高质量参考水平 + +### Track B:P0-2 单请求量产样板 + +#### Goal + +证明单请求报表家族可以形成高通过率的通用转化模板。 + +#### Tasks + +1. 恢复 request / response / normalize 三件套 +2. 压缩伪通用兜底主路径 +3. 验证同家族样板迁移能力 + +#### Acceptance Criteria + +1. `single_request_table` 样板稳定通过 +2. 同家族样板具备可复用性 +3. 结果判定不再过度依赖全文总结 + +### Track C:P0-3 分页补数样板 + +#### Goal + +正确识别复杂分页补数场景的问题空间,并在证据不足时稳定阻断。 + +#### Tasks + +1. 拆开主请求链、补数链、导出链 +2. 建立 `paginated_enrichment` 最小可编译证据集 +3. 区分业务 workflow 与宿主桥接行为 +4. 落地 fail-closed 判定 + +#### Acceptance Criteria + +1. 分页补数 workflow 被正确拆解 +2. 证据不足时稳定 fail-closed +3. 不再把宿主链或 `localhost:*` 误判为业务主链 + +### Track D:P1 家族扩展 + +#### Goal + +在 P0 样板稳定后,将能力迁移到已定义的 P1 家族,验证路线具备规模化复制能力。 + +#### Tasks + +1. 迁移线损 / 电量多模式家族 +2. 迁移单请求报表家族 +3. 迁移分页补数家族 +4. 记录每一类家族的复用成功率与失败类型 + +#### Acceptance Criteria + +1. 每个 P1 家族至少完成一轮代表场景迁移验证 +2. P1 验证主要依赖 P0 已沉淀的证据、契约和标准答案体系 +3. 若超出当前 archetype 或契约能力边界,结果应明确 fail-closed + +### Track E:Scene Family Expansion Policy + +#### Goal + +以五大场景分组为单位,明确哪些家族进入主线,哪些家族仅做预留或降级。 + +#### Tasks + +1. 对 `G1` 通用单页报表组建立量产迁移节奏 +2. 对 `G2` 多模式报表组建立深做样板节奏 +3. 对 `G3` 分页明细补数组建立复杂链识别节奏 +4. 对 `G4` 工具检测前置组仅保留架构入口与后续扩展口径 +5. 对 `G5` 低优先级噪声组建立默认降级口径 + +#### Acceptance Criteria + +1. `G1 + G2 + G3` 成为首轮主线范围 +2. `G4` 不抢占当前主线资源,但保留后续检测类扩展入口 +3. `G5` 不污染主线 archetype 和验收口径 + +## Milestone Order + +总前置里程碑的发生顺序固定为: + +1. 先完成语义证据层 +2. 再完成最小可编译业务契约 +3. 再冻结 P0 标准答案 + +在这三个里程碑完成之前,不进入大规模家族扩展。 + +## File-Level Planning Targets + +本计划要求后续实施至少覆盖以下资产类型: + +1. `docs/superpowers/specs/` 中的上游设计稿 +2. `docs/superpowers/plans/` 中的阶段计划与进展计划 +3. scene 生成链中的证据层、契约层、readiness / blocker 相关实现 +4. P0 样板对应的 fixture、golden IR、验收基线或等价校准资产 + +## Completion Criteria + +本计划完成的标志为: + +1. `tq` 主样板可以稳定恢复核心业务语义,并在内网运行与报表结果上达到高质量参考水平 +2. 单请求主样板可以形成可复制的高通过率模板,并覆盖主流通用报表场景 +3. 分页补数主样板可以稳定识别复杂 workflow,并在证据不足时 fail-closed +4. `Scene IR` 前存在可裁决的证据层 +5. archetype 前存在明确契约 gate +6. P0 标准答案已成为后续迁移与回归的统一校准基线 +7. 实施主线明确聚焦 `G1 + G2 + G3`,不再被边界场景牵引偏航 + +## Risks and Control Points + +1. 若证据层先天过薄,后续契约和 canonical answer 会失去支撑 +2. 若契约 gate 定义过宽,系统会继续伪造 runnable skill +3. 若 P0 标准答案不冻结,后续优化将失去对齐基线 +4. 若过早进入 P1 扩展,容易在未完成分层前再次引入宿主噪声污染 + +## Out of Plan + +以下事项明确不属于本计划直接交付范围: + +1. 统一平台登录流程自动恢复 +2. 目标业务系统后台登录实现细节 +3. 浏览器宿主能力的全量抽象 +4. 所有场景的一次性端到端可运行保证 diff --git a/docs/superpowers/plans/2026-04-17-scene-skill-compiler-plan.md b/docs/superpowers/plans/2026-04-17-scene-skill-compiler-plan.md new file mode 100644 index 0000000..7c3419b --- /dev/null +++ b/docs/superpowers/plans/2026-04-17-scene-skill-compiler-plan.md @@ -0,0 +1,663 @@ +# Scene Skill Compiler Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Upgrade `sg_scene_generate` from a scene metadata extractor plus template filler into a reusable scene skill compiler that can understand workflow semantics, classify scene archetypes, and generate runnable skills for both `tq-lineloss-report`-style and `marketing-zero-consumer-report`-style internal scenes. + +**Architecture:** Introduce a unified `Scene IR`, switch extraction to a hybrid deterministic-plus-LLM pipeline, route generation by `workflowArchetype`, align runtime resolver contracts, and add readiness gates so users can tell whether a generated skill is safe to trial on the internal network. + +**Tech Stack:** Rust, Node.js, HTML/CSS/JavaScript, serde_json, OpenAI-compatible LLM API + +--- + +## Scope Check + +This plan implements the design in: + +- `docs/superpowers/specs/2026-04-17-scene-skill-compiler-design.md` + +This plan builds on the existing generator work already described in: + +- `docs/superpowers/specs/2026-04-17-llm-driven-skill-generation-design.md` +- `docs/superpowers/specs/2026-04-17-enhanced-llm-extraction-schema-design.md` +- `docs/superpowers/specs/2026-04-17-progressive-template-enhancement-design.md` +- `docs/superpowers/specs/2026-04-16-multi-scene-kind-generator-design.md` + +This plan does not attempt to solve: + +- full login and authentication reconstruction +- all historical scene patterns in one pass +- 100% no-touch generation without human review + +--- + +## File Map + +### Core generator pipeline + +| File | Action | Purpose | +|------|--------|---------| +| `frontend/scene-generator/llm-client.js` | Modify | Replace truncation-only extraction with chunked workflow-aware extraction and `Scene IR` schema output | +| `frontend/scene-generator/generator-runner.js` | Modify | Add deterministic scene scanning, key-fragment selection, and IR support | +| `frontend/scene-generator/server.js` | Modify | Expose analysis, preview, readiness, and generation endpoints for `Scene IR` | +| `frontend/scene-generator/sg_scene_generator.html` | Modify | Show extraction preview, archetype classification, bootstrap, risks, and readiness | + +### Rust backend + +| File | Action | Purpose | +|------|--------|---------| +| `src/generated_scene/analyzer.rs` | Modify | Add deterministic extraction helpers and archetype support | +| `src/generated_scene/generator.rs` | Modify | Route generation by archetype and compile from `Scene IR` instead of ad hoc fields | +| `src/generated_scene/ir.rs` | Create | Define unified `Scene IR` structs and serde contracts | +| `src/bin/sg_scene_generate.rs` | Modify | Accept `Scene IR` JSON or file input and pass it into generator | +| `src/compat/scene_platform/resolvers.rs` | Modify | Align runtime parameter resolution with generated contracts | + +### Tests and fixtures + +| File | Action | Purpose | +|------|--------|---------| +| `tests/scene_generator_test.rs` | Modify | Cover new analysis, archetype classification, and generation routing | +| `tests/generated_scene_*` or related fixtures | Modify/Create | Add representative fixtures for single-request, multi-mode, and paginated-enrichment scenes | + +--- + +## Scope Guardrails + +- Do not break existing `--scene-id`, `--scene-name`, or `--scene-kind` compatibility. +- Do not require all scenes to provide complete metadata in HTML meta tags. +- Do not force the runtime to support new resolver contracts unless generation is updated to gate incompatible output. +- Do not assume all report scenes share `org + period` params. +- Do not silently generate low-confidence skills as if they were runnable. + +--- + +### Task 1: Fix Current Hard Failures Before Compiler Refactor + +**Files:** +- Modify: `frontend/scene-generator/llm-client.js` +- Modify: `frontend/scene-generator/generator-runner.js` +- Modify: `frontend/scene-generator/server.js` +- Modify: `frontend/scene-generator/sg_scene_generator.html` +- Modify: `src/generated_scene/generator.rs` + +**Goal:** Stop the most obvious wrong outputs that currently make generated skills fail on the internal network even before the full compiler architecture lands. + +- [ ] **Step 1: Remove report-scene hardcoded parameter assumptions** + +Audit `scene.toml` generation in `src/generated_scene/generator.rs` and remove default injection of generic report params such as: + +- fixed `org` +- fixed `period` +- default dictionary entity for a specific city +- generic page title keywords like `["报表", "线损"]` + +Expected result: generated params come from extracted scene semantics or are omitted when not confidently known. + +- [ ] **Step 2: Rework bootstrap source priority** + +Change bootstrap derivation so `expected_domain` and `target_url` are resolved using this order: + +1. explicit deep extraction result +2. deterministic extraction from business entry points +3. HTML meta tags if trustworthy +4. fallback empty with warning + +Explicitly prevent script-host URLs such as static JS includes from becoming the business domain by mistake. + +- [ ] **Step 3: Replace naive truncation with chunked extraction input** + +Update `frontend/scene-generator/llm-client.js` and `frontend/scene-generator/generator-runner.js` so they no longer send only the first `15000/3000` characters. Replace with: + +1. directory tree summary +2. `index.html` chunking +3. URL-bearing fragments +4. request-construction fragments +5. branching logic fragments +6. export-related fragments + +- [ ] **Step 4: Add analysis preview and risk banner in Web UI** + +Update `frontend/scene-generator/sg_scene_generator.html` and `frontend/scene-generator/server.js` to preview: + +- detected archetype +- bootstrap +- key endpoints +- extracted params +- workflow steps +- confidence and risk notes + +- [ ] **Step 5: Verify with marketing and tq reference scenes** + +Run local analysis against the two reference scenes and confirm: + +- `marketing-zero-consumer-report` no longer resolves the wrong domain +- `tq-lineloss-report` still identifies mode-related structures +- generated preview no longer shows generic hardcoded report params + +- [ ] **Step 6: Commit** + +```bash +git add frontend/scene-generator/llm-client.js frontend/scene-generator/generator-runner.js frontend/scene-generator/server.js frontend/scene-generator/sg_scene_generator.html src/generated_scene/generator.rs +git commit -m "fix(generator): remove hardcoded report defaults and improve bootstrap extraction" +``` + +--- + +### Task 2: Introduce Unified Scene IR + +**Files:** +- Create: `src/generated_scene/ir.rs` +- Modify: `src/generated_scene/generator.rs` +- Modify: `src/bin/sg_scene_generate.rs` +- Modify: `frontend/scene-generator/server.js` +- Modify: `frontend/scene-generator/llm-client.js` + +**Goal:** Introduce a single intermediate representation that all extraction and compilation stages use. + +- [ ] **Step 1: Add Rust `Scene IR` structs** + +Create `src/generated_scene/ir.rs` with serde-enabled structs for: + +- `SceneIr` +- `BootstrapIr` +- `ParamIr` +- `ModeIr` +- `WorkflowStepIr` +- `ArtifactContractIr` +- `NormalizeRulesIr` +- `ReadinessIr` +- `EvidenceIr` + +Minimum top-level fields: + +```json +{ + "sceneId": "", + "sceneName": "", + "sceneKind": "", + "workflowArchetype": "", + "bootstrap": {}, + "params": [], + "modes": [], + "workflowSteps": [], + "requestTemplate": {}, + "responsePath": "", + "normalizeRules": {}, + "artifactContract": {}, + "validationHints": {}, + "evidence": [] +} +``` + +- [ ] **Step 2: Wire `Scene IR` into generator entrypoints** + +Update `src/bin/sg_scene_generate.rs` to accept either: + +- `--scene-info-json` upgraded to the new IR contract, or +- a new `--scene-ir-json` / `--scene-ir-file` parameter + +Keep backward compatibility by translating old scene info into partial IR where needed. + +- [ ] **Step 3: Refactor generator to compile from IR** + +Update `src/generated_scene/generator.rs` so its internal interfaces no longer directly depend on loosely grouped fields like `expectedDomain`, `staticParams`, and `columnDefs` alone. It should compile from unified `SceneIr`. + +- [ ] **Step 4: Update Node server to pass IR through generation** + +Modify `frontend/scene-generator/server.js` so analyze endpoints return IR-shaped JSON and generate endpoints pass the same structure into Rust without flattening. + +- [ ] **Step 5: Verify serde and CLI compatibility** + +Run: + +```bash +cargo check +node --check frontend/scene-generator/server.js +node --check frontend/scene-generator/llm-client.js +``` + +Expected: Rust and Node compile cleanly with the new IR contract. + +- [ ] **Step 6: Commit** + +```bash +git add src/generated_scene/ir.rs src/generated_scene/generator.rs src/bin/sg_scene_generate.rs frontend/scene-generator/server.js frontend/scene-generator/llm-client.js +git commit -m "feat(generator): introduce unified scene ir for analysis and compilation" +``` + +--- + +### Task 3: Build Hybrid Extraction Pipeline + +**Files:** +- Modify: `src/generated_scene/analyzer.rs` +- Modify: `frontend/scene-generator/generator-runner.js` +- Modify: `frontend/scene-generator/llm-client.js` +- Modify: `frontend/scene-generator/server.js` + +**Goal:** Split extraction into deterministic signal collection plus LLM semantic completion. + +- [ ] **Step 1: Implement deterministic extraction helpers** + +Add helper logic in `src/generated_scene/analyzer.rs` or adjacent extraction code to detect: + +- URLs and request methods +- `contentType` +- request payload builders +- pagination variables such as `page`, `rows`, `pageSize` +- branch variables such as `period_mode`, `reportType` +- entry methods +- export methods +- obvious filter expressions such as `charge !== 0` + +- [ ] **Step 2: Create key-fragment selection in Node runner** + +Update `frontend/scene-generator/generator-runner.js` to extract and package: + +- directory summary +- URL fragments +- branch fragments +- request-body fragments +- response normalization fragments +- export fragments + +for LLM analysis. + +- [ ] **Step 3: Redesign LLM prompt for workflow understanding** + +Update `frontend/scene-generator/llm-client.js` so the prompt explicitly asks for: + +- `workflowArchetype` +- `bootstrap` +- `params` +- `modes` +- `workflowSteps` +- `requestTemplate` +- `responsePath` +- `normalizeRules` +- `artifactContract` +- `confidence` +- `uncertainties` + +- [ ] **Step 4: Merge deterministic and LLM results** + +Implement merge logic in `frontend/scene-generator/server.js` or a dedicated helper: + +- deterministic extraction wins for hard facts +- LLM fills missing semantics +- conflicts are surfaced in preview as warnings + +- [ ] **Step 5: Verify against reference workflows** + +Check that: + +- `marketing-zero-consumer-report` emits workflow steps including `paginate`, `secondary_request`, `filter`, and `export` +- `tq-lineloss-report` emits `modes`, `defaultMode`, and `modeSwitchField` + +- [ ] **Step 6: Commit** + +```bash +git add src/generated_scene/analyzer.rs frontend/scene-generator/generator-runner.js frontend/scene-generator/llm-client.js frontend/scene-generator/server.js +git commit -m "feat(generator): add hybrid deterministic and llm workflow extraction" +``` + +--- + +### Task 4: Add Workflow Archetype Classification + +**Files:** +- Modify: `src/generated_scene/analyzer.rs` +- Modify: `src/generated_scene/ir.rs` +- Modify: `frontend/scene-generator/server.js` +- Modify: `frontend/scene-generator/sg_scene_generator.html` + +**Goal:** Reliably classify scenes so the correct compiler path is chosen. + +- [ ] **Step 1: Add archetype enum support** + +Define and support these initial archetypes: + +- `single_request_table` +- `multi_mode_request` +- `paginated_enrichment` +- `page_state_eval` + +- [ ] **Step 2: Implement classification rules** + +Classification logic should prefer: + +1. `multi_mode_request` when explicit mode-switch branching exists +2. `paginated_enrichment` when paginated list fetch plus secondary requests are detected +3. `page_state_eval` when page-state judgment dominates +4. `single_request_table` as fallback with lower confidence + +- [ ] **Step 3: Expose classification confidence** + +Add confidence and evidence fields to the preview payload so UI can show why a scene was classified into an archetype. + +- [ ] **Step 4: Add manual override support in UI** + +Allow users to override archetype in `frontend/scene-generator/sg_scene_generator.html` before final generation, but preserve the original detected result and confidence. + +- [ ] **Step 5: Verify reference classifications** + +Expected: + +- `marketing-zero-consumer-report` => `paginated_enrichment` +- `tq-lineloss-report` => `multi_mode_request` + +- [ ] **Step 6: Commit** + +```bash +git add src/generated_scene/analyzer.rs src/generated_scene/ir.rs frontend/scene-generator/server.js frontend/scene-generator/sg_scene_generator.html +git commit -m "feat(generator): classify scenes by workflow archetype with confidence" +``` + +--- + +### Task 5: Split Generator Into Archetype Compilers + +**Files:** +- Modify: `src/generated_scene/generator.rs` +- Optionally create: `src/generated_scene/compiler_single_request.rs` +- Optionally create: `src/generated_scene/compiler_multi_mode.rs` +- Optionally create: `src/generated_scene/compiler_paginated_enrichment.rs` +- Optionally create: `src/generated_scene/compiler_page_state.rs` + +**Goal:** Replace the single generic report template with explicit compiler paths. + +- [ ] **Step 1: Add compiler routing by archetype** + +Update `src/generated_scene/generator.rs` so generation dispatches on `workflowArchetype`. + +- [ ] **Step 2: Implement `single_request_table` compiler** + +Generate: + +- minimal `scene.toml` +- direct request browser script +- artifact output for simple table/list data + +- [ ] **Step 3: Implement `multi_mode_request` compiler** + +Generate: + +- mode detection +- mode-specific request builders +- mode-specific column definitions +- mode-specific response extraction +- unified artifact output + +Reference target: `tq-lineloss-report` + +- [ ] **Step 4: Implement `paginated_enrichment` compiler** + +Generate: + +- paginated list loop +- per-item or batched secondary requests +- aggregation and transform steps +- business filters +- final artifact or export output + +Reference target: `marketing-zero-consumer-report` + +- [ ] **Step 5: Implement `page_state_eval` compiler** + +Generate: + +- state-check script skeleton +- light artifact semantics for monitoring or status checks + +- [ ] **Step 6: Verify generated outputs by archetype** + +Validate that generated scripts no longer: + +- define multiple API endpoints but use only the first +- collapse mode-aware scenes into one request body +- flatten paginated enrichment scenes into one-step normalization + +- [ ] **Step 7: Commit** + +```bash +git add src/generated_scene/generator.rs src/generated_scene/compiler_*.rs +git commit -m "feat(generator): split scene generation into workflow archetype compilers" +``` + +--- + +### Task 6: Align Runtime Resolver Contracts + +**Files:** +- Modify: `src/compat/scene_platform/resolvers.rs` +- Modify: `src/generated_scene/generator.rs` +- Modify: `src/generated_scene/ir.rs` + +**Goal:** Ensure generated parameter contracts are either executable by the runtime or explicitly flagged as unsupported. + +- [ ] **Step 1: Audit current resolver coverage** + +Document which current contracts are already supported, including: + +- `dictionary_entity` +- `month_week_period` +- `fixed_enum` +- `literal_passthrough` + +- [ ] **Step 2: Add missing resolver types or gate them** + +Choose one of these paths per parameter type: + +1. implement new runtime resolver support +2. downgrade generation to an existing supported resolver +3. block generation with explicit readiness warning + +Recommended additions: + +- `mode_enum` +- `date_range` +- `org_tree` +- `page_size` +- `hidden_static` +- `derived_param` + +- [ ] **Step 3: Reflect runtime compatibility in generated metadata** + +Generated output should clearly indicate: + +- supported params +- unresolved params +- manual-completion requirements + +- [ ] **Step 4: Add tests for resolver alignment** + +Extend tests to ensure a generated skill cannot claim runnable readiness when its params require unsupported resolver behavior. + +- [ ] **Step 5: Commit** + +```bash +git add src/compat/scene_platform/resolvers.rs src/generated_scene/generator.rs src/generated_scene/ir.rs tests/scene_generator_test.rs +git commit -m "feat(runtime): align generated scene contracts with resolver support" +``` + +--- + +### Task 7: Add Readiness Gates And Generation Report + +**Files:** +- Modify: `frontend/scene-generator/server.js` +- Modify: `frontend/scene-generator/sg_scene_generator.html` +- Modify: `src/generated_scene/ir.rs` +- Modify: `src/generated_scene/generator.rs` + +**Goal:** Make generation output self-describing so users know whether a skill is ready for internal-network trial. + +- [ ] **Step 1: Add static readiness checks** + +Implement checks for: + +- entrypoint detection +- request-chain completeness +- bootstrap plausibility +- param/runtime compatibility +- archetype compiler completeness + +- [ ] **Step 2: Add readiness levels** + +Define: + +- `A` = ready for direct internal-network trial +- `B` = structurally correct, human review recommended +- `C` = draft only, manual completion required + +- [ ] **Step 3: Generate human-readable report** + +Each analysis or generation result should include: + +- archetype +- confidence +- key evidence +- detected risks +- missing pieces +- readiness level + +- [ ] **Step 4: Display readiness in Web UI** + +Show the readiness grade before generation and after generation, with explicit warnings for internal-network execution risk. + +- [ ] **Step 5: Verify readiness outcomes** + +Expected baseline: + +- `tq-lineloss-report` should reach `A` or high-confidence `B` +- `marketing-zero-consumer-report` should not be labeled runnable unless pagination and secondary-request logic are correctly represented + +- [ ] **Step 6: Commit** + +```bash +git add frontend/scene-generator/server.js frontend/scene-generator/sg_scene_generator.html src/generated_scene/ir.rs src/generated_scene/generator.rs +git commit -m "feat(generator): add readiness grading and generation risk reporting" +``` + +--- + +### Task 8: Add Regression Coverage For Reference Scenes + +**Files:** +- Modify: `tests/scene_generator_test.rs` +- Create/Modify: scene generator fixtures as needed + +**Goal:** Lock in the two reference scenes as ongoing regression cases. + +- [ ] **Step 1: Add marketing classification fixture coverage** + +Test that the marketing source scene is classified as `paginated_enrichment` and contains evidence for: + +- paginated list request +- secondary request +- filter rule +- export step + +- [ ] **Step 2: Add tq classification fixture coverage** + +Test that the tq source scene is classified as `multi_mode_request` and contains evidence for: + +- month mode +- week mode +- distinct request templates +- distinct column definitions + +- [ ] **Step 3: Add generation-shape assertions** + +Assert that generated outputs differ by archetype and do not collapse to a single generic template shape. + +- [ ] **Step 4: Run verification** + +```bash +cargo test --test scene_generator_test -- --nocapture +``` + +Expected: both reference cases pass and guard against regression. + +- [ ] **Step 5: Commit** + +```bash +git add tests/scene_generator_test.rs tests/fixtures +git commit -m "test(generator): add regression coverage for marketing and tq reference scenes" +``` + +--- + +## Delivery Sequence + +Recommended implementation order: + +1. Task 1: hard failure fixes +2. Task 2: `Scene IR` +3. Task 3: hybrid extraction +4. Task 4: archetype classification +5. Task 5: compiler split +6. Task 6: resolver alignment +7. Task 7: readiness gates +8. Task 8: regression coverage + +Rationale: + +- Task 1 stops current bad outputs early. +- Tasks 2 to 5 establish the new compiler backbone. +- Tasks 6 and 7 prevent false claims of runnability. +- Task 8 locks the new architecture against regression. + +--- + +## Verification Strategy + +### Static Verification + +- `cargo check` +- `cargo test --test scene_generator_test -- --nocapture` +- `node --check frontend/scene-generator/llm-client.js` +- `node --check frontend/scene-generator/generator-runner.js` +- `node --check frontend/scene-generator/server.js` + +### Functional Verification + +For `marketing-zero-consumer-report`: + +- detected as `paginated_enrichment` +- bootstrap resolves to business domain, not static script host +- generated workflow includes pagination and secondary requests +- generation is not labeled runnable if those steps are missing + +For `tq-lineloss-report`: + +- detected as `multi_mode_request` +- month and week logic remain distinct +- request templates and column definitions are mode-specific + +### UI Verification + +Confirm the scene generator UI now shows: + +- detected archetype +- confidence +- bootstrap +- key params +- readiness grade +- risk notes + +--- + +## Acceptance Criteria + +This plan is complete when all of the following are true: + +1. `sg_scene_generate` consumes a unified `Scene IR`. +2. The analysis pipeline can distinguish at least `single_request_table`, `multi_mode_request`, `paginated_enrichment`, and `page_state_eval`. +3. `tq-lineloss-report` is generated through the multi-mode compiler path. +4. `marketing-zero-consumer-report` is generated through the paginated-enrichment compiler path. +5. Generated `scene.toml` no longer injects unrelated default org/period assumptions. +6. Bootstrap resolution no longer mistakes external script hosts for business target domains. +7. Runtime resolver compatibility is explicit, not implicit. +8. Generation results include readiness grading and risk reporting before internal-network trial. + diff --git a/docs/superpowers/plans/2026-04-18-g1-boundary-reassignment-plan.md b/docs/superpowers/plans/2026-04-18-g1-boundary-reassignment-plan.md new file mode 100644 index 0000000..337cd3c --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g1-boundary-reassignment-plan.md @@ -0,0 +1,193 @@ +# G1 边界收敛与家族重排实施计划 +> Date: 2026-04-18 +> Status: Draft +> Source: +> - `docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md` +> - `examples/g1_batch_round1/` + +## 1. Plan Intent + +本计划用于处理 `G1` 通用单页报表组边界过宽的问题。 + +通过对以下 4 个边界样本的实测与结构分析,已经确认当前 `G1` 分类存在误收问题: + +1. `高低压新增报装容量月度统计表` +2. `电能表现场检验完成率指标报表` +3. `计量资产库存统计` +4. `95598供电服务月报` + +结论不是“是否继续观察”,而是“必须整改”: + +1. `G1` 的定义必须收紧 +2. 这 4 个样本必须重排 +3. 后续实施必须按新边界推进,不能继续把这 4 个样本混在同一类里 + +## 2. Rectification Objective + +本轮整改目标固定为: + +1. 收紧 `G1` 定义,避免继续污染 `single_request_table` +2. 将 4 个边界样本重新分配到正确家族 +3. 为后续实现提供明确顺序,不再把边界样本混做“通用报表” + +## 3. Final Reassignment Decision + +本计划执行时,4 个样本的正式归类结论固定如下: + +1. `高低压新增报装容量月度统计表` + - 保留在 `G1` + - 子型标记为:`G1-E 轻量补查汇总型` +2. `电能表现场检验完成率指标报表` + - 从 `G1` 拆出 + - 新家族标记为:`G6 宿主桥接多步查询型` +3. `计量资产库存统计` + - 从 `G1` 拆出 + - 新家族标记为:`G7 多接口盘点汇总型` +4. `95598供电服务月报` + - 从 `G1` 拆出 + - 新家族标记为:`G8 抓取落库分析出文档型` + +## 4. Scope Guardrails + +本计划边界固定如下: + +1. 不修改线损家族 `G2` +2. 不扩展到全部 `102` 个场景同步重排 +3. 只处理 `G1` 边界定义与这 4 个边界样本 +4. 不在本计划内直接实现 `G6/G7/G8` 全部能力 +5. 本计划优先产出“边界收敛 + 家族重排 + 实施顺序” + +## 5. Phase Overview + +执行顺序固定为: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3` + +### Phase 0: 冻结整改口径 + +目标: + +1. 冻结 `G1` 修订定义 +2. 冻结 4 个样本的正式重排结论 + +退出标准: + +1. 后续不再把这 4 个样本同时作为 `G1` 候选讨论 + +### Phase 1: 收紧 G1 边界 + +目标: + +1. 将 `G1` 明确收敛为“通用单页报表” +2. 把不属于 `G1` 的结构特征显式列为排除条件 + +必须落地的对象: + +1. `G1` 修订定义 +2. `G1` 进入条件 +3. `G1` 排除条件 +4. `G1-E` 作为上边界子型的说明 + +退出标准: + +1. `single_request_table` 不再承接宿主桥接型、盘点型、落库分析型场景 + +### Phase 2: 样本重排与家族建档 + +目标: + +1. 把 4 个样本正式移到对应家族 +2. 为 `G6/G7/G8` 建立最小定义 + +必须落地的对象: + +1. 样本重排表 +2. `G6` 最小定义 +3. `G7` 最小定义 +4. `G8` 最小定义 + +退出标准: + +1. 4 个样本不再处于“G1 模糊候选”状态 + +### Phase 3: 后续实施顺序固定 + +目标: + +1. 确定后续开发顺序 +2. 避免多家族并发扩散 + +固定顺序: + +1. 先继续推进 `高低压新增报装容量月度统计表` + - 作为 `G1-E` +2. 再单开 `G6` + - `电能表现场检验完成率指标报表` +3. 再评估 `G7` + - `计量资产库存统计` +4. 最后评估 `G8` + - `95598供电服务月报` + +退出标准: + +1. 后续任务顺序明确 +2. `G1` 不再继续吞入新边界样本 + +## 6. Family-Level Rectification Rules + +### 6.1 G1 修订规则 + +`G1` 仅保留以下场景: + +1. 单系统、单页面承载 +2. 存在相对清晰的主请求链 +3. 请求模板与响应路径可直接恢复 +4. 最终结果为单表或单次统计汇总 +5. 不依赖复杂宿主桥接 +6. 不依赖本地落库与 SQL 分析 + +### 6.2 G1 排除规则 + +出现以下特征之一,即不再归入 `G1`: + +1. `BrowserAction / sgBrowserExcuteJsCode` 主导业务请求推进 +2. 存在明显多轮 callback 串联 workflow +3. 同场景内存在多个业务 endpoint 分类型扫数 +4. 报表前需要本地落库、二次分析或 SQL 聚合 +5. 输出以 Word 文档流水线而非直接表格结果为主 + +## 7. Implementation Priority + +优先级固定如下: + +1. `P0` + - `高低压新增报装容量月度统计表` + - 目标:验证 `G1-E` 是否可作为 `G1` 上边界稳定成立 +2. `P1` + - `电能表现场检验完成率指标报表` + - 目标:验证 `G6` 的最小 workflow 定义 +3. `P2` + - `计量资产库存统计` + - 目标:验证 `G7` 的多 endpoint 聚合边界 +4. `P3` + - `95598供电服务月报` + - 目标:验证 `G8` 的抓取落库分析链路边界 + +## 8. Deliverables + +本计划完成时至少产出: + +1. `G1` 边界修订文案 +2. 4 个边界样本重排表 +3. `G6/G7/G8` 最小家族定义 +4. 后续实施优先级清单 + +## 9. Completion Criteria + +本计划完成的标志是: + +1. `G1` 定义被正式收紧 +2. 4 个边界样本完成正式重排 +3. `高低压新增报装容量月度统计表` 被确定为 `G1-E` +4. `电能表现场检验完成率指标报表`、`计量资产库存统计`、`95598供电服务月报` 不再继续作为 `G1` 样本使用 +5. 后续开发顺序固定,不再反复讨论边界归属 diff --git a/docs/superpowers/plans/2026-04-18-g1-e-light-enrichment-report-plan.md b/docs/superpowers/plans/2026-04-18-g1-e-light-enrichment-report-plan.md new file mode 100644 index 0000000..48fcb72 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g1-e-light-enrichment-report-plan.md @@ -0,0 +1,212 @@ +# G1-E Light Enrichment Report Plan + +> Date: 2026-04-18 +> Status: Draft +> Source: +> - `docs/superpowers/specs/2026-04-18-g1-e-light-enrichment-report-design.md` +> - `docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md` +> - `docs/superpowers/reports/2026-04-18-g1-boundary-reassignment-report.md` + +## 1. Plan Intent + +本计划用于把 `G1-E 轻量补查汇总型` 从概念边界推进到可实施状态。 + +本轮只解决一个问题: + +1. 让生成器能够对“单主请求 + 少量补查 + 单次汇总输出”的场景,恢复出可编译的三段式业务语义。 + +本计划不处理 `G6/G7/G8`,也不扩展到其它家族。 + +## 2. Scope + +本计划纳入范围的对象只有三类: + +1. `G1-E` 证据层补齐 +2. `G1-E` 三段式 `Scene IR` / compiler gate 落地 +3. `高低压新增报装容量月度统计表` 的 P0 样板验证 + +本计划明确排除: + +1. `G6 宿主桥接多步查询型` +2. `G7 多接口盘点汇总型` +3. `G8 抓取落库分析出文档型` +4. `102` 个场景的大规模家族扩展 + +## 3. Fixed Sample + +本计划的唯一 P0 样板固定为: + +1. `高低压新增报装容量月度统计表` + +该样板的冻结目标是: + +1. 主请求:`getWkorderAll` +2. 补查请求: + - `queryElectCustInfo` + - `queryBusAcpt` + - `getBatchPerCust97` +3. 最终恢复为主请求、补查请求、并回规则三段式结构 + +在本计划完成前,不新增第二个 `G1-E` 样板。 + +## 4. Phase Overview + +执行顺序固定为: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3` + +### Phase 0: Freeze Contract + +目标: + +1. 冻结 `G1-E` 最小定义 +2. 冻结 P0 样板的主链、补查链、并回链目标口径 + +必须落地的对象: + +1. `G1-E` spec +2. P0 样板目标结构说明 +3. 失败分类口径 + +退出标准: + +1. 后续实现不再回退成普通 `G1 single_request_table` + +### Phase 1: Evidence Layer Completion + +目标: + +1. 让提取链路可以显式产出 `main_request` 证据 +2. 让提取链路可以显式产出 `enrichment_request` 证据 +3. 让提取链路可以显式产出 `merge_plan` 证据 + +必须落地的对象: + +1. `main_request` 证据 schema +2. `enrichment_request` 证据 schema +3. `merge_plan` 证据 schema +4. 对应的越界识别信号 + +退出标准: + +1. P0 样板不再只落到 `page_state_eval` +2. 提取结果中能看见主请求、补查请求、并回规则候选 + +### Phase 2: Scene IR And Compiler Gates + +目标: + +1. 在 `Scene IR` 中承载三段式结构 +2. 在 compiler 中增加 `G1-E` 专属 gate +3. 防止缺失补查契约的结果误判为普通 `G1` 成功 + +必须落地的对象: + +1. `main_request` +2. `enrichment_requests[]` +3. `merge_plan` +4. `main_request_resolved` +5. `enrichment_requests_resolved` +6. `merge_plan_resolved` +7. `g1e_scope_compatible` + +退出标准: + +1. `G1-E` 可以独立于 `single_request_table` 被判定 +2. 越界样本会被阻断,而不是伪成功 + +### Phase 3: P0 Validation + +目标: + +1. 用 `高低压新增报装容量月度统计表` 验证 `G1-E` 最小闭环 +2. 冻结第一版验收基线 + +必须落地的对象: + +1. P0 样板生成结果 +2. P0 样板验证记录 +3. P0 样板失败归因记录 + +退出标准: + +1. 主请求、补查请求、并回规则均能稳定恢复 +2. 结果不再是空壳 `params=[] / requestEntries=[] / columnDefs=[]` +3. 缺证据时能 fail-closed + +## 5. Work Breakdown + +### Task Group A: G1-E Evidence Modeling + +任务目标: + +1. 定义主请求证据对象 +2. 定义补查请求证据对象 +3. 定义并回规则证据对象 +4. 明确越界到 `G6/G7/G8` 的识别信号 + +完成标志: + +1. `G1-E` 不再依赖“全文像不像报表”的模糊判断 + +### Task Group B: G1-E IR / Compiler Integration + +任务目标: + +1. 为 `G1-E` 建立三段式 `Scene IR` +2. 增加 `G1-E` gate +3. 切断“补查缺失但仍按普通 G1 成功”的通道 + +完成标志: + +1. `G1-E` 与 `G1` 的成功条件正式分离 + +### Task Group C: P0 Sample Verification + +任务目标: + +1. 重新生成 `高低压新增报装容量月度统计表` +2. 核对主请求、补查请求、并回规则是否完整 +3. 输出验证报告 + +完成标志: + +1. `高低压新增报装容量月度统计表` 成为 `G1-E` 第一版标准样板 + +## 6. Deliverables + +本计划完成时至少产出: + +1. `G1-E` 证据层实现 +2. `G1-E` 三段式 `Scene IR` +3. `G1-E` compiler gate +4. `高低压新增报装容量月度统计表` 的 P0 生成与验证结果 +5. 对应整改报告或验证报告 + +## 7. Acceptance Criteria + +本计划完成的标志是: + +1. `G1-E` 已从文档定义进入可实现、可验证状态 +2. `高低压新增报装容量月度统计表` 不再被误生成为普通 `G1` 空壳 skill +3. 生成器能够显式恢复: + - 主请求 + - 补查请求 + - 并回规则 +4. 当证据不足或结构越界时,系统会阻断并说明原因 + +## 8. Execution Guardrails + +执行过程中必须遵守以下边界: + +1. 不把 `G6/G7/G8` 的能力提前混入 `G1-E` +2. 不扩展第二个 `G1-E` 样板 +3. 不为了“先生成一个 skill”而放松 gate +4. 不把 `G1-E` 再退化回普通 `single_request_table` + +## 9. Next Plan + +本计划完成后,后续顺序固定为: + +1. 若 `G1-E` P0 验证通过,再决定是否补第二个 `G1-E` 样板 +2. 然后再进入 `G6` 的独立 spec / plan diff --git a/docs/superpowers/plans/2026-04-18-g2-family-expansion-plan.md b/docs/superpowers/plans/2026-04-18-g2-family-expansion-plan.md new file mode 100644 index 0000000..634fb7b --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g2-family-expansion-plan.md @@ -0,0 +1,304 @@ +# G2 家族扩展整改计划 + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Inputs:** +> [2026-04-18-g2-remediation-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-18-g2-remediation-plan.md) +> [2026-04-18-g2-second-round-remediation-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-18-g2-second-round-remediation-report.md) + +## Plan Intent + +本计划用于承接上一轮 `G2` 主样本整改结果,把目标从“修通 `tq` 主样本”推进到“扩展 `G2` 家族变体覆盖”。 + +上一轮已经证明: + +1. `台区线损大数据-月_周累计线损率统计分析` 可以进入候选验证名单 +2. `G2` 主样本链路已经具备可编译性 +3. `白银线损周报` +4. `线损同期差异报表` + +这两份剩余真实样本仍然稳定 `fail-close` + +因此,本计划的核心目标不是重做上一轮主样本整改,而是补齐 `G2` 家族内部剩余两类变体的识别与合同恢复能力。 + +## Success Baseline + +本计划完成后的最低成功口径固定为: + +1. `白银线损周报` 不再因为 `G2` 合同缺失而直接阻断 +2. `线损同期差异报表` 不再被粗暴套入 `tq` 主报表模板 +3. 生成器能够明确区分至少两类新增 `G2` 家族子型 +4. 新增子型具备各自最小可解释合同 +5. 对证据不足的样本继续 `fail-close` +6. readiness 与“是否达到候选验证名单”保持一致 +7. 输出第三轮 `G2` 家族扩展回归报告 + +## Scope Guardrails + +执行过程中保持以下边界不变: + +1. 不切换到 `G1` +2. 不切换到 `G3` +3. 不展开统一登录、隐藏域登录或宿主 transport 重构 +4. 不扩展到 102 个全量场景 +5. 不把本计划扩散成通用 scene skill 平台重写 +6. 不否定上一轮 `tq` 主样本已经收敛的口径 + +## Target Samples + +本计划只围绕以下三份 `G2` 家族真实样本执行: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +其中角色区分为: + +1. `台区线损大数据-月_周累计线损率统计分析` + 作用:`G2-A` 主样本基线,不允许回退 +2. `白银线损周报` + 作用:`G2-B` 周报单侧 mode 变体 +3. `线损同期差异报表` + 作用:`G2-C` 混合联动变体 + +## Family Expansion Hypothesis + +基于上一轮报告,本计划先将 `G2` 家族收束为三类: + +1. `G2-A` + 定义:`tq` 主报表型,具备稳定的 `month/week + cols1/cols2 + mode-specific request/response` + 当前状态:已进入候选验证名单 +2. `G2-B` + 定义:周报偏单侧 mode 变体,存在 `week/tjzq` 与线损主接口,但缺少与主样本同等级的双模式列合同 + 当前代表:`白银线损周报` +3. `G2-C` + 定义:线损主链路与外部系统联动混合变体,存在线损接口和联动接口并存的情况 + 当前代表:`线损同期差异报表` + +本计划的整改原则是: + +1. 不强行把 `G2-B/G2-C` 编造成 `G2-A` +2. 先把三类子型边界立住 +3. 再让每类子型各自拥有最小合同 + +## Workstreams + +本计划拆为五条工作流: + +1. `WS1` G2 子型分层与判定收束 +2. `WS2` G2-B 周报变体合同补齐 +3. `WS3` G2-C 混合联动变体隔离 +4. `WS4` G2 家族 readiness 分级重整 +5. `WS5` 真实样本第三轮回归与报告 + +## Phase Overview + +本计划按四个阶段推进: + +1. Phase 0:冻结家族扩展目标 +2. Phase 1:建立 `G2-A/G2-B/G2-C` 子型边界 +3. Phase 2:分别补齐 `G2-B/G2-C` 最小合同 +4. Phase 3:回归三份真实样本并输出扩展报告 + +执行顺序固定为: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3` + +## Phase 0:冻结扩展目标 + +### Objective + +把上一轮已经收敛出来的家族事实冻结下来,避免再次把问题表述成“主样本没修好”。 + +### Tasks + +1. 固化 `G2-A` 已达标口径 +2. 固化 `G2-B` 与 `G2-C` 的直接 blocker +3. 固化本计划只补家族扩展,不回退主样本链路 + +### Exit Criteria + +1. `tq` 主样本被视为基线,不再作为待整改对象 +2. 家族扩展问题被明确表述为“变体支持缺失” + +## Phase 1:建立子型边界 + +### Objective + +让系统能区分 `G2-A/G2-B/G2-C`,而不是所有 `G2` 一律走同一套路。 + +### WS1:G2 子型分层与判定收束 + +#### Task 1 + +审计当前 `G2` 真实样本信号差异,明确以下边界: + +1. 哪些信号属于 `G2-A` +2. 哪些信号属于 `G2-B` +3. 哪些信号属于 `G2-C` + +#### Task 2 + +为 `G2` 增加子型判定规则,至少能区分: + +1. 双模式主报表型 +2. 周报单侧 mode 型 +3. 混合联动型 + +#### Task 3 + +补充 fixture 与回归测试,证明: + +1. `G2-A` 不回退 +2. `G2-B` 不再误套 `G2-A` +3. `G2-C` 不再误套 `G2-A` + +### Phase 1 Exit Criteria + +1. `G2` 家族内部已可分层 +2. 生成路径不再默认所有 `G2` 都是 `tq` 主报表 + +## Phase 2:补齐变体最小合同 + +### Objective + +分别为 `G2-B` 和 `G2-C` 建立“足够小但可解释”的合同。 + +### WS2:G2-B 周报变体合同补齐 + +#### Task 4 + +定义 `G2-B` 的最小合同,至少包括: + +1. 主 mode 或主周期字段 +2. 对应 request template +3. 对应 response path +4. 对应 column/required fields + +#### Task 5 + +修改 analyzer / generator / scene ir 组装逻辑,使 `白银线损周报` 能输出非空合同,而不是继续因合同缺失直接阻断。 + +#### Task 6 + +新增或更新测试,证明 `G2-B` 可以独立成立,不依赖 `month/week` 双模式完整结构。 + +### WS3:G2-C 混合联动变体隔离 + +#### Task 7 + +审计 `线损同期差异报表` 中: + +1. 线损主链路 +2. 同期系统联动链路 +3. 哪一部分属于主报表合同 + +#### Task 8 + +为 `G2-C` 建立隔离规则,避免混合联动接口污染主报表生成。 + +#### Task 9 + +定义 `G2-C` 的最小可编译合同,允许: + +1. 主链路进入候选验证 +2. 联动链路作为风险或扩展证据保留 + +而不是全部混在一起后直接失败。 + +#### Task 10 + +新增或更新测试,证明 `G2-C` 至少能稳定输出“主链路 + 联动风险”的结构化结果。 + +### WS4:G2 家族 readiness 分级重整 + +#### Task 11 + +为 `G2-A/G2-B/G2-C` 增加子型级 readiness gate。 + +#### Task 12 + +调整 readiness 评级逻辑,保证: + +1. `G2-A` 满足完整双模式合同时可以进入 `A` +2. `G2-B` 满足其最小合同时可以进入候选验证等级 +3. `G2-C` 若仅主链路闭合,也能获得可解释等级 +4. 证据不足时继续 `fail-close` + +#### Task 13 + +补充测试,证明 readiness 不会再用 `G2-A` 的标准去误判全部 `G2` 子型。 + +### Phase 2 Exit Criteria + +1. `G2-B` 具备最小合同 +2. `G2-C` 具备隔离后的最小合同 +3. readiness 与子型口径一致 + +## Phase 3:真实样本第三轮回归 + +### Objective + +基于扩展后的家族能力,重新回归三份真实样本并输出正式结论。 + +### WS5:真实样本第三轮回归与报告 + +#### Task 14 + +重新生成以下三份真实样本: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +#### Task 15 + +按统一口径对比: + +1. 子型判定 +2. bootstrap +3. request contract +4. response / column / normalize contract +5. readiness +6. 是否进入候选验证名单 + +#### Task 16 + +输出第三轮 `G2` 家族扩展整改报告,至少说明: + +1. `G2-A` 是否保持稳定 +2. `G2-B` 是否进入候选验证名单 +3. `G2-C` 是否进入候选验证名单或仍需 fail-close +4. 剩余 blocker 是否已经从“主样本不可生成”转移为“少数变体待扩展” + +### Deliverables + +1. `G2` 家族扩展回归测试 +2. `G2-B/G2-C` 对应 fixture +3. 第三轮真实样本生成结果 +4. 第三轮 `G2` 家族扩展整改报告 + +### Acceptance Criteria + +1. `G2-A` 不回退 +2. `G2-B` 至少达到可解释合同或候选验证等级 +3. `G2-C` 至少达到主链路隔离成功,不能继续被整包噪声污染 +4. 三份样本不再被单一 `G2-A` 模型粗暴处理 + +## File-Level Targets + +本计划执行时,至少会触达以下资产类型: + +1. `src/generated_scene/` 下的 analyzer / generator / readiness 相关实现 +2. `tests/fixtures/generated_scene/` 下的 `G2-B/G2-C` fixture +3. `tests/` 下与 scene generator / readiness / family regression 相关的测试 +4. `docs/superpowers/reports/` 下的第三轮家族扩展报告 + +## Completion Criteria + +本计划完成的标志是: + +1. `G2` 已从“单主样本修通”推进到“至少三类子型可区分” +2. `白银线损周报` 与 `线损同期差异报表` 不再只是被动 fail-close +3. 下一步是否继续扩到更多线损变体,可以建立在第三轮家族扩展报告上 diff --git a/docs/superpowers/plans/2026-04-18-g2-remediation-plan.md b/docs/superpowers/plans/2026-04-18-g2-remediation-plan.md new file mode 100644 index 0000000..3c90c45 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g2-remediation-plan.md @@ -0,0 +1,331 @@ +# G2 家族整改计划 + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Spec:** [2026-04-18-g2-remediation-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-18-g2-remediation-design.md) + +## Plan Intent + +本计划用于把 `G2` 家族整改设计拆解为可执行任务,目标是把当前线损多模式报表家族从“信号能抓到但主链重建失败”,推进到“至少第一份样本达到候选验证门槛”。 + +本计划严格限定在 `G2` 家族整改,不扩展到: + +1. `G1` +2. `G3` +3. 更大范围真实场景迁移 +4. 登录恢复、宿主协议重构或运行时 transport 改造 + +## Success Baseline + +整改阶段的最低成功口径固定为: + +1. `台区线损大数据-月_周累计线损率统计分析` 不再坍缩为 `paginated_enrichment` +2. 至少该样本能生成 `multi_mode_request` 结构 +3. `bootstrap` 落到线损主业务承载面 +4. `modes` 至少恢复 `month` 与 `week` +5. mode-specific `request/response/column/normalize` 合同不再为空 +6. readiness 不再在核心合同缺失时给出虚高 `A` +7. 样本结果达到“可进入候选验证”门槛 + +## Scope Guardrails + +执行过程中保持以下边界不变: + +1. 不切换到 `G1/G3` 样本执行 +2. 不继续补更多同类 `G2` 观察样本 +3. 不在本计划中展开内网人工验证 +4. 不在本计划中处理统一登录与隐藏域登录恢复 +5. 不发散到 scene skill 平台通用重构 + +## Target Samples + +本计划整改与回归只围绕以下三份 `G2` 样本: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +对应产物路径: + +1. `examples/real_scene_batch_round1/skills/real-tq-lineloss-report-r1` +2. `examples/real_scene_batch_round1/skills/real-baiyin-lineloss-weekly-r1` +3. `examples/real_scene_batch_round1/skills/real-lineloss-period-diff-r1` + +## Workstreams + +本计划拆为五条工作流,与上游 `spec` 一一对应: + +1. `WS1` G2 archetype 纠偏 +2. `WS2` bootstrap 纠偏 +3. `WS3` mode contract 重建 +4. `WS4` endpoint 去污染 +5. `WS5` readiness 收紧 + +## Phase Overview + +本计划按四个阶段推进: + +1. Phase 0:冻结整改基线 +2. Phase 1:修正识别与选择 +3. Phase 2:重建 `G2` 合同 +4. Phase 3:回归真实样本并产出整改报告 + +执行顺序固定为: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3` + +其中 `Phase 1` 先于 `Phase 2`,避免在错误 archetype 和错误 bootstrap 上继续堆模板逻辑。 + +## Phase 0:冻结整改基线 + +### Objective + +把当前 `G2` 家族首轮 blocker、对标口径和验收门槛冻结,避免整改过程中边界漂移。 + +### Tasks + +1. 固化三份 `G2` 样本的当前失败画像 +2. 固化 `tq-lineloss-report` 作为 `G2` 主锚点参考 +3. 固化 `G2` 候选验证门槛 +4. 固化整改阶段只围绕 `G2` 的边界 + +### Deliverables + +1. 本计划 +2. 已存在的 `G2` blocker 汇总 +3. 已存在的第一轮迁移与候选验证报告 + +### Exit Criteria + +1. 后续执行不再追加同类 `G2` 观察样本 +2. 不再用“先去内网试试”替代整改闭环 + +## Phase 1:修正识别与选择 + +### Objective + +先把 `G2` 主链判定修正过来,解决 archetype、bootstrap 与 endpoint 污染这三个上游问题。 + +### WS1:G2 Archetype Rectification + +#### Task 1 + +审计当前 `G2` archetype 误判来源,确认: + +1. 哪些分页信号在夺权 +2. 哪些 mode 信号没有进入主判定 +3. 当前 `multi_mode_request` 与 `paginated_enrichment` 的优先级冲突点在哪里 + +#### Task 2 + +修改 `G2` archetype 判定逻辑,使以下信号在 `G2` 中具备更高权重: + +1. `month/week` +2. `mode` +3. `tjzq` +4. 同一场景内多组线损接口 +5. 模式切换分支字段 + +#### Task 3 + +新增或更新回归测试,证明: + +1. 当前 `G2` fixture 不再判成 `paginated_enrichment` +2. `G2` 相关修正不会误伤现有 `G3` fixture + +### WS2:Bootstrap Rectification + +#### Task 4 + +审计当前 bootstrap 选择逻辑,确认为什么三份样本都稳定落到 `20.77.115.36:31051`。 + +#### Task 5 + +为 `G2` 引入更严格的 bootstrap 选择约束: + +1. 优先真实线损业务承载页 +2. 排除页面壳入口与错误主域 +3. 继续排除 `localhost:*`、第三方库 URL、静态资源 URL + +#### Task 6 + +新增或更新测试,证明: + +1. `G2` 主样本 bootstrap 不再落到错误入口 +2. `localhost:*` 仍只作为宿主依赖证据保留 + +### WS4:Endpoint Purification + +#### Task 7 + +审计当前 endpoint 提取污染来源,明确以下类别如何被误收进业务候选: + +1. 第三方依赖库 +2. 文档外链 +3. 静态资源 URL +4. 其他业务系统遗留接口 + +#### Task 8 + +收紧 endpoint 候选过滤与排序规则,使 `G2` 样本中: + +1. 线损主业务接口排在前列 +2. 外链与依赖库 URL 不再进入主业务候选 +3. 其他业务系统 endpoint 不再轻易抢占主链 + +#### Task 9 + +补充测试,证明: + +1. `G2` 主 endpoint 排序明显改善 +2. 噪声 endpoint 不再污染生成主脚本 + +### Phase 1 Exit Criteria + +1. `G2` fixture archetype 判定修正 +2. `G2` bootstrap 选择修正 +3. `G2` endpoint 候选排序修正 + +## Phase 2:重建 G2 合同 + +### Objective + +在主链判定正确后,恢复 `G2` 必需的 mode-specific 合同与更严格的 readiness。 + +### WS3:Mode Contract Reconstruction + +#### Task 10 + +为 `G2` 定义最小 mode contract,至少包括: + +1. `modes[]` +2. `defaultMode` +3. `modeSwitchField` +4. per-mode `requestTemplate` +5. per-mode `responsePath` +6. per-mode `columnDefs` +7. per-mode `normalizeRules` + +#### Task 11 + +修改 `Scene IR` 组装或生成逻辑,让 `G2` 样本在证据充分时真正输出 `modes[]`,而不是只保留空壳默认字段。 + +#### Task 12 + +修改 `G2` 生成脚本模板或编译路径,避免继续退化成通用: + +- `paginate -> secondary_request -> filter` + +要求生成结果能体现: + +1. `month` 模式 +2. `week` 模式 +3. 不同模式的请求差异 +4. 不同模式的列差异 + +#### Task 13 + +新增或更新测试,证明: + +1. `台区线损大数据-月_周累计线损率统计分析` 可输出非空 `modes` +2. 至少一个 `G2` fixture 恢复出 mode-specific contract + +### WS5:Readiness Tightening + +#### Task 14 + +为 `G2` 新增或收紧 gate,至少覆盖: + +1. `g2_archetype_resolved` +2. `g2_bootstrap_resolved` +3. `g2_modes_present` +4. `g2_request_contract_complete` +5. `g2_response_contract_complete` + +#### Task 15 + +调整 readiness 评级逻辑,保证以下情况不再给出高等级: + +1. `modes = []` +2. `requestTemplate = null` +3. `columnDefs = []` +4. archetype 误判 + +#### Task 16 + +新增或更新测试,证明: + +1. 不闭合 `G2` 样本会被降级或阻断 +2. readiness 与候选验证门槛一致 + +### Phase 2 Exit Criteria + +1. 至少 `G2` 主样本拥有可解释的 mode contract +2. readiness 不再虚高 +3. `G2` 生成结果在结构上具备进入候选门槛的可能 + +## Phase 3:回归真实样本并产出整改报告 + +### Objective + +在整改完成后,重新生成三份 `G2` 真实样本,并输出第二轮正式结论。 + +### Tasks + +#### Task 17 + +重新生成以下三份 `G2` 样本: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +#### Task 18 + +按与第一轮完全一致的口径,对比以下项目: + +1. archetype +2. bootstrap +3. modes +4. request contract +5. response / column / normalize contract +6. readiness + +#### Task 19 + +输出整改后的第二轮报告,至少包含: + +1. 哪些 blocker 被修掉 +2. 哪些 blocker 仍存在 +3. 哪些样本进入候选验证名单 +4. 哪些样本仍需 fail-closed + +### Deliverables + +1. 第二轮 `G2` 真实样本生成结果 +2. 第二轮 `G2` 整改回归报告 +3. 更新后的候选验证名单 + +### Acceptance Criteria + +1. `台区线损大数据-月_周累计线损率统计分析` 至少进入候选验证名单 +2. 三份样本不再统一坍缩成 `paginated_enrichment` +3. readiness 与真实业务闭合程度基本一致 + +## File-Level Targets + +本计划执行时,至少会触达以下类型资产: + +1. `src/generated_scene/` 下的 analyzer / generator / readiness 相关实现 +2. `tests/fixtures/generated_scene/` 下的 `G2` fixture 或 canonical 资产 +3. `tests/` 下与 scene generator / canonical / readiness 相关的回归测试 +4. `docs/superpowers/reports/` 下的第二轮整改报告 + +## Completion Criteria + +本计划完成的标志是: + +1. `G2` 主样本达到候选验证门槛 +2. `G2` 家族 blocker 从“稳定复现”转为“部分修复且可解释” +3. 后续是否切换到 `G1/G3`,可以建立在整改后二轮报告上,而不是继续依赖第一轮失败画像 diff --git a/docs/superpowers/plans/2026-04-18-g3-paginated-enrichment-plan.md b/docs/superpowers/plans/2026-04-18-g3-paginated-enrichment-plan.md new file mode 100644 index 0000000..b9fb02f --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g3-paginated-enrichment-plan.md @@ -0,0 +1,458 @@ +# G3 Paginated Enrichment Plan + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Spec:** [2026-04-18-g3-paginated-enrichment-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-18-g3-paginated-enrichment-design.md) + +## Plan Intent + +本计划用于把 `G3` 分页补数家族设计拆解为可执行任务,目标是把当前 `paginated_enrichment` 从“宽泛的复杂 workflow 标签”推进到“具备证据层、最小合同、canonical baseline 和 fail-closed 判定”的正式主线 archetype。 + +本计划严格限定在 `G3 / P0-3` 落地,不扩展到: + +1. `G6/G7/G8` +2. 全量 `95598` 家族并发整改 +3. 登录恢复或宿主 transport 重构 +4. 102 个场景大规模铺开 + +## Success Baseline + +本计划完成后的最低成功口径固定为: + +1. `95598工单明细表` 不再只是“复杂工单类”模糊样本 +2. 生成链能够显式恢复: + - `main request` + - `pagination plan` + - `enrichment requests` + - `export plan` +3. `localhost:*`、宿主注入和 BrowserAction 不再被误判为业务主链 +4. `G3` 具备最小可编译合同和独立 gate +5. 证据不足时结果稳定 `fail-closed` +6. `95598、12398、流程超期风险工单明细` 能作为第一扩展样板进入复用验证 + +## Scope Guardrails + +执行过程中保持以下边界不变: + +1. 不把 `G3` 回退为普通分页表识别 +2. 不把宿主桥接能力提前混入 `G3` 合同 +3. 不为了先生成 skill 而放松 gate +4. 不并发展开 `G6/G7/G8` +5. 不在本计划中做真实内网人工验证 + +## Target Samples + +本计划整改与回归只围绕以下两个样板: + +1. `95598工单明细表` +2. `95598、12398、流程超期风险工单明细` + +其中角色固定为: + +1. `95598工单明细表` + - 作用:`P0-3` 主样板 + - 目标:冻结 `G3 canonical` +2. `95598、12398、流程超期风险工单明细` + - 作用:第一扩展样板 + - 目标:验证 `G3` 合同与证据层是否可复用 + +## Workstreams + +本计划拆为五条工作流: + +1. `WS1` G3 边界冻结与样板建档 +2. `WS2` G3 证据层建模 +3. `WS3` G3 Scene IR / compiler gate / readiness 建设 +4. `WS4` G3 P0 canonical 与失败 taxonomy 冻结 +5. `WS5` G3 真实样本回归与报告 + +## Phase Overview + +本计划按五个阶段推进: + +1. Phase 0:冻结 `G3` 边界与样板 +2. Phase 1:建立 `G3` 证据层 +3. Phase 2:建立 `G3` 最小合同与 gate +4. Phase 3:冻结 `P0-3 canonical` +5. Phase 4:回归真实样本并输出首轮报告 + +执行顺序固定为: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3 -> Phase 4` + +## Phase 0:冻结 G3 边界与样板 + +### Objective + +先把 `G3` 的问题边界、主样板和扩展样板固定下来,避免开发过程中把工单类、宿主桥接类和导出分析类重新混在一起。 + +### WS1:G3 边界冻结与样板建档 + +#### Task 1 + +冻结 `G3` 正式定义: + +1. 不是普通分页表 +2. 不是宿主桥接型 +3. 而是“主查询链 + 分页链 + 补数链 + 导出链”并存的复杂 workflow 报表 + +#### Task 2 + +冻结 `95598工单明细表` 为唯一 `P0-3` 主样板。 + +#### Task 3 + +冻结 `95598、12398、流程超期风险工单明细` 为第一扩展样板。 + +#### Task 4 + +固化 `G3` 进入条件: + +1. 存在主查询链候选 +2. 存在分页控制证据 +3. 存在补数或关联详情链 +4. 最终结果依赖分页拉全、补齐、导出或汇总 + +#### Task 5 + +固化 `G3` 排除条件: + +1. 单请求即可完成的普通报表 +2. 仅靠 BrowserAction 推进、无稳定业务主链 +3. 以本地落库分析或文档产物为主体 +4. `localhost:*` 或宿主依赖明显压过业务证据 + +### Deliverables + +1. `G3` family definition +2. `G3` 样板清单 +3. `G3` 进入条件与排除条件 +4. `G3` 与其它家族边界说明 + +### Exit Criteria + +1. `95598工单明细表` 不再作为模糊工单样本讨论 +2. `G3` 不再与宿主桥接型、文档产物型场景混淆 + +## Phase 1:建立 G3 证据层 + +### Objective + +把源码直接压成 `Scene IR` 的路径升级为:先形成 `G3` 可裁决证据,再归约成 `Scene IR`。 + +### WS2:G3 证据层建模 + +#### Task 6 + +定义 `main_request_candidate`,承载: + +1. 主查询 endpoint +2. 查询参数模板 +3. 时间范围或主过滤条件 + +#### Task 7 + +定义 `pagination_candidate`,承载: + +1. 页码字段 +2. pageSize 字段 +3. 翻页终止条件 +4. 滚动窗口或区间推进规则 + +#### Task 8 + +定义 `enrichment_request_candidate`,承载: + +1. 详情补查 +2. 二次接口 +3. 关联补数 + +#### Task 9 + +定义 `join_key_candidate`,承载: + +1. 工单号 +2. 流程号 +3. 用户号 +4. 设备号 +5. 其它主补链关联键 + +#### Task 10 + +定义 `export_candidate`,承载: + +1. 导出接口 +2. 导出参数 +3. 导出前置动作 +4. 产物类型 + +#### Task 11 + +定义 `workflow_step_candidate`,承载: + +1. 主查 +2. 翻页 +3. 补查 +4. 聚合 +5. 导出 + +之间的顺序关系。 + +#### Task 12 + +定义 `dedupe_or_merge_rule_candidate`,承载: + +1. 去重规则 +2. 主从并回规则 +3. 跨页累积规则 + +#### Task 13 + +定义 `host_bridge_candidate` 与 `localhost_dependency_candidate`,确保宿主链只作为独立证据保留。 + +#### Task 14 + +建立证据归并与冲突裁决规则,明确: + +1. 哪些属于业务主链 +2. 哪些属于宿主桥接 +3. 哪些属于结果导出链 + +### Deliverables + +1. `G3` evidence schema +2. `G3` evidence type dictionary +3. 证据归并规则 +4. `95598工单明细表` 第一版证据样例 + +### Exit Criteria + +1. 主链、分页链、补链、导出链、宿主链能够分槽呈现 +2. `localhost:*` 不再混入业务主链 + +## Phase 2:建立 G3 最小合同与 Gate + +### Objective + +把 `G3` 的判定标准从“看起来像分页补数场景”升级为“最小业务合同是否成立”。 + +### WS3:G3 Scene IR / compiler gate / readiness 建设 + +#### Task 15 + +定义 `G3` 最小合同,至少包括: + +1. `main_request` +2. `pagination_plan` +3. `enrichment_requests[]` +4. `join_keys[]` +5. `export_plan` +6. `merge_or_dedupe_rules` + +#### Task 16 + +在 `Scene IR` 中承载 `G3` 专属结构,不再退化成普通 `paginated_enrichment` 空壳字段。 + +#### Task 17 + +增加 `G3` gate,至少包括: + +1. `g3_main_request_resolved` +2. `g3_pagination_contract_complete` +3. `g3_enrichment_contract_complete` +4. `g3_join_key_resolved` +5. `g3_export_path_identified` +6. `g3_runtime_scope_compatible` + +#### Task 18 + +定义 blocker / readiness 判定口径,要求能区分: + +1. 业务证据不足 +2. 分页合同不闭合 +3. 补数合同不闭合 +4. 导出链依赖宿主 +5. 运行时依赖未满足 + +#### Task 19 + +落地 `fail-closed` 规则: + +1. 主请求链缺失,阻断 +2. 分页链存在但终止条件不明,阻断 +3. 补数链存在但 join key 不明,阻断 +4. 只有导出动作没有业务主链,阻断 +5. 宿主桥接证据明显多于业务证据,阻断 + +#### Task 20 + +补充测试,证明未闭合 `G3` 样本不能伪装成 runnable skill。 + +### Deliverables + +1. `G3` minimal contract table +2. `G3` gate table +3. `G3` blocker / readiness table +4. `G3` Scene IR example + +### Exit Criteria + +1. `G3` 已拥有独立 gate +2. 未闭合结果会准确阻断 +3. `compiler` 不再吞入未闭合 `G3 IR` + +## Phase 3:冻结 P0-3 Canonical + +### Objective + +把 `95598工单明细表` 做成 `G3` 的第一版标准答案、关键证据基线和失败 taxonomy 基线。 + +### WS4:G3 P0 canonical 与失败 taxonomy 冻结 + +#### Task 21 + +冻结 `95598工单明细表` 的 canonical `Scene IR`。 + +#### Task 22 + +冻结关键证据清单,至少包括: + +1. 主请求链 +2. 分页链 +3. 补数链 +4. join key +5. 导出链 +6. 宿主依赖 + +#### Task 23 + +冻结验收检查表,至少检查: + +1. 主链是否恢复 +2. 分页链是否恢复 +3. 补链是否恢复 +4. join key 是否恢复 +5. 导出链是否恢复 +6. 宿主链是否被隔离 +7. readiness 是否与真实闭合程度一致 + +#### Task 24 + +冻结失败 taxonomy,至少包括: + +1. `main_chain_missing` +2. `pagination_incomplete` +3. `enrichment_incomplete` +4. `join_key_missing` +5. `export_only_without_business_chain` +6. `host_bridge_pollution` +7. `runtime_dependency_unresolved` + +#### Task 25 + +建立“生成结果 vs canonical”对齐方式。 + +### Deliverables + +1. `G3` P0 canonical `Scene IR` +2. `G3` P0 evidence baseline +3. `G3` acceptance checklist +4. `G3` failure taxonomy table + +### Exit Criteria + +1. `95598工单明细表` 成为 `G3` 第一版统一校准源 +2. 后续 `G3` 回归都可以对照固定 taxonomy + +## Phase 4:真实样本回归与首轮报告 + +### Objective + +先用 `P0` 主样板建立闭环,再用一个扩展样板验证 `G3` 合同是否具备复用性。 + +### WS5:G3 真实样本回归与报告 + +#### Task 26 + +重新生成 `95598工单明细表`。 + +#### Task 27 + +按统一口径检查: + +1. archetype +2. bootstrap +3. main request +4. pagination plan +5. enrichment requests +6. join keys +7. export plan +8. localhost / host bridge separation +9. readiness / blocker + +#### Task 28 + +输出 `G3 P0 validation report`,结论只允许以下三种: + +1. `通过` +2. `Fail-closed 且理由准确` +3. `误判,需要整改` + +#### Task 29 + +重新生成 `95598、12398、流程超期风险工单明细`。 + +#### Task 30 + +对比其与 `P0` 样板之间: + +1. 哪些合同可复用 +2. 哪些 blocker 是家族共性 +3. 哪些是扩展样板特有复杂度 + +#### Task 31 + +输出 `G3 first-round family expansion report`。 + +### Deliverables + +1. `G3` P0 样板生成结果 +2. `G3` P0 验证报告 +3. `G3` 扩展样板生成结果 +4. `G3` 首轮家族扩展报告 + +### Acceptance Criteria + +1. `95598工单明细表` 至少达到“结构恢复完整”或“Fail-closed 理由准确” +2. 扩展样板不会再被粗暴压成普通分页表 +3. `G3` 失败结果具备可解释性 +4. `G3` 至少形成第一版家族复用口径 + +## File-Level Targets + +本计划执行时,至少会触达以下资产类型: + +1. `docs/superpowers/specs/` +2. `docs/superpowers/plans/` +3. `docs/superpowers/reports/` +4. `src/generated_scene/` 下与证据层、合同层、readiness 相关实现 +5. `tests/fixtures/generated_scene/` +6. `tests/` + +## Completion Criteria + +本计划完成的标志是: + +1. `G3` 已拥有正式边界定义 +2. `G3` 已拥有最小证据层与最小合同 +3. `G3` 已拥有独立 gate 与 fail-closed 口径 +4. `95598工单明细表` 已成为 `P0-3 canonical` +5. `G3` 首轮真实样本回归已经给出正式结论 + +## Next Step + +本计划完成后,后续顺序固定为: + +1. 若 `G3` 的 `P0` 与首轮扩展样板稳定,再决定是否补第二个 `G1-E` 样板 +2. 然后再决定是否进入 `G6` 的独立设计与计划 diff --git a/docs/superpowers/plans/2026-04-18-g6-host-bridge-workflow-plan.md b/docs/superpowers/plans/2026-04-18-g6-host-bridge-workflow-plan.md new file mode 100644 index 0000000..82b8ae5 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g6-host-bridge-workflow-plan.md @@ -0,0 +1,77 @@ +# G6 Host Bridge Workflow Plan + +> Date: 2026-04-18 +> Status: Initial implementation slice + +## Plan Intent + +Start the `G6` line after `G1-E` second-sample reuse has been validated. + +This plan implements the first safe slice only: classification, evidence separation, readiness gates, and fail-closed behavior. + +## Phase 0: Boundary Freeze + +Tasks: + +1. keep `电能表现场检验完成率指标报表` as the P0 boundary sample +2. define the repo-local representative fixture +3. keep `G6` separate from `G1`, `G1-E`, `G3`, `G7`, and `G8` + +Deliverables: + +1. `G6` design doc +2. `G6` plan doc +3. repo-local representative fixture + +Acceptance criteria: + +1. `G6` is no longer discussed as a `G1` candidate +2. `G6` is not treated as a generic localhost-pollution case + +## Phase 1: Analyzer Classification + +Tasks: + +1. add `host_bridge_workflow` as a workflow archetype +2. detect explicit host bridge actions +3. keep `localhost:*` as supporting host-runtime evidence +4. ensure explicit host bridge signals outrank `G1-E` +5. ensure ordinary localhost export noise does not become `G6` + +Acceptance criteria: + +1. `g6_host_bridge_workflow` fixture classifies as `host_bridge_workflow` +2. `bootstrap_localhost_pollution` remains a non-G6 business scene + +## Phase 2: Fail-Closed Gate + +Tasks: + +1. add readiness risks for missing or unsupported G6 contract +2. add `g6_host_bridge_detected` +3. add `g6_fail_closed` +4. block generation before runnable output + +Acceptance criteria: + +1. `G6` generation returns a controlled error +2. error message includes `host_bridge_workflow` +3. no pseudo-runnable skill is produced + +## Phase 3: Regression + +Tasks: + +1. run scene generator regression +2. run family regression +3. run family policy regression +4. run canonical regression + +Acceptance criteria: + +1. all target regressions pass +2. no `G1-E/G3/G2` behavior regresses + +## Next Step + +After this safe G6 slice, continue to `G7 多接口盘点汇总型` boundary assessment unless G6 runtime implementation becomes the selected priority. diff --git a/docs/superpowers/plans/2026-04-18-g7-multi-endpoint-inventory-plan.md b/docs/superpowers/plans/2026-04-18-g7-multi-endpoint-inventory-plan.md new file mode 100644 index 0000000..ed56a60 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g7-multi-endpoint-inventory-plan.md @@ -0,0 +1,68 @@ +# G7 Multi Endpoint Inventory Plan + +> Date: 2026-04-18 +> Status: Initial implementation slice + +## Plan Intent + +Start `G7` after the safe `G6` classification slice. + +This plan only establishes boundary classification and fail-closed behavior. It does not implement runnable multi-endpoint inventory aggregation. + +## Phase 0: Boundary Freeze + +Tasks: + +1. use `计量资产库存统计` as the P0 boundary sample +2. define a repo-local representative fixture +3. keep `G7` separate from `G1`, `G1-E`, `G6`, and `G8` + +Acceptance criteria: + +1. `G7` is no longer a `G1` candidate +2. `G7` is not confused with host bridge workflow + +## Phase 1: Analyzer Classification + +Tasks: + +1. add `multi_endpoint_inventory` as a workflow archetype +2. detect inventory endpoint families +3. classify scenes with three or more inventory endpoints as `G7` + +Acceptance criteria: + +1. `g7_multi_endpoint_inventory` fixture classifies as `multi_endpoint_inventory` +2. inventory endpoint names include `assetStatsQueryMeter` and `assetStatsQueryJlGnModule` + +## Phase 2: Fail-Closed Gate + +Tasks: + +1. add `g7_inventory_endpoints_detected` +2. add `g7_fail_closed` +3. block generation before runnable output + +Acceptance criteria: + +1. generation returns a controlled error +2. error message includes `multi_endpoint_inventory` +3. no pseudo-runnable skill is produced + +## Phase 3: Regression + +Tasks: + +1. run scene generator regression +2. run family regression +3. run family policy regression +4. run canonical regression + +Acceptance criteria: + +1. all target regressions pass +2. no existing family baseline regresses + +## Next Step + +After this safe G7 slice, continue to `G8 抓取落库分析出文档型` boundary assessment. diff --git a/docs/superpowers/plans/2026-04-18-g8-local-doc-pipeline-plan.md b/docs/superpowers/plans/2026-04-18-g8-local-doc-pipeline-plan.md new file mode 100644 index 0000000..d7cfe51 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-g8-local-doc-pipeline-plan.md @@ -0,0 +1,70 @@ +# G8 Local Document Pipeline Plan + +> Date: 2026-04-18 +> Status: Initial implementation slice + +## Plan Intent + +Start `G8` after the safe `G7` classification slice. + +This plan only establishes boundary classification and fail-closed behavior. It does not implement runnable local storage, SQL, or document generation orchestration. + +## Phase 0: Boundary Freeze + +Tasks: + +1. use `95598供电服务月报` as the P0 boundary sample +2. define a repo-local representative fixture +3. keep `G8` separate from `G1`, `G1-E`, `G6`, `G7`, and `G3` + +Acceptance criteria: + +1. `G8` is no longer a `G1` candidate +2. `G8` is not collapsed into generic host bridge workflow + +## Phase 1: Analyzer Classification + +Tasks: + +1. add `local_doc_pipeline` as a workflow archetype +2. detect `definedSqlQuery` +3. detect `docExport` +4. detect `selectData` / local config service persistence +5. prioritize `G8` over `G6` when both signals exist + +Acceptance criteria: + +1. `g8_local_doc_pipeline` fixture classifies as `local_doc_pipeline` +2. local pipeline actions are visible in deterministic facts + +## Phase 2: Fail-Closed Gate + +Tasks: + +1. add `g8_local_doc_pipeline_detected` +2. add `g8_fail_closed` +3. block generation before runnable output + +Acceptance criteria: + +1. generation returns a controlled error +2. error message includes `local_doc_pipeline` +3. no pseudo-runnable skill is produced + +## Phase 3: Regression + +Tasks: + +1. run scene generator regression +2. run family regression +3. run family policy regression +4. run canonical regression + +Acceptance criteria: + +1. all target regressions pass +2. no existing family baseline regresses + +## Next Step + +After this safe G8 slice, the boundary-reassignment sequence has a code-backed fail-closed guard for `G1-E`, `G6`, `G7`, and `G8`. diff --git a/docs/superpowers/plans/2026-04-18-lineloss-family-variant-expansion-plan.md b/docs/superpowers/plans/2026-04-18-lineloss-family-variant-expansion-plan.md new file mode 100644 index 0000000..ed9be00 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-lineloss-family-variant-expansion-plan.md @@ -0,0 +1,215 @@ +# 线损家族变体扩展计划 + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Inputs:** +> [2026-04-18-g2-family-expansion-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-18-g2-family-expansion-plan.md) +> [2026-04-18-g2-family-expansion-third-round-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-18-g2-family-expansion-third-round-report.md) + +## Plan Intent + +本计划用于承接当前已经收敛出的 `G2-A/G2-B/G2-C` 三类线损子型,把目标从“修通三个代表样本”推进到“可复制扩展更多线损变体”。 + +当前已经证明: + +1. `G2-A` 双模式主报表型可生成 +2. `G2-B` 周报单侧 mode 型可生成 +3. `G2-C` 混合联动型可生成 + +因此,下一阶段不再围绕这三个样本反复微调,而是要把“线损场景 -> 子型 -> 最小合同 -> 候选验证”这条复制链做出来。 + +## Success Baseline + +本计划完成后的最低成功口径固定为: + +1. 新增一批线损真实场景能够被归入现有子型或新子型 +2. 每个新增子型都有最小合同标准 +3. 至少每类新增子型有 2 到 3 个真实样本完成迁移验证 +4. 不能归类或合同不足的样本继续 `fail-close` +5. 形成一份“线损家族实施映射表” +6. 输出一轮新的线损家族扩展报告 + +## Scope Guardrails + +执行过程中保持以下边界不变: + +1. 不扩展到非线损报表家族 +2. 不处理统一登录、隐藏域登录或宿主 transport 重构 +3. 不在本计划中做真实内网人工验证 +4. 不把本计划扩散成 102 个全量场景一次性铺开 +5. 不回头推翻已经收敛的 `G2-A/G2-B/G2-C` 结果 + +## Phase Overview + +本计划按五个阶段推进: + +1. Phase 0:冻结线损扩展基线 +2. Phase 1:建立线损变体分组清单 +3. Phase 2:为新增变体建立最小合同标准 +4. Phase 3:按分组扩展 fixture / 判定 / 生成链路 +5. Phase 4:回归真实样本并输出扩展报告 + +执行顺序固定为: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3 -> Phase 4` + +## Phase 0:冻结扩展基线 + +### Objective + +把当前已经达成的线损家族基线冻结下来,作为后续横向复制的起点。 + +### Tasks + +1. 固化 `G2-A/G2-B/G2-C` 当前口径 +2. 固化这三类子型的最小合同事实 +3. 固化本计划不再回到“主样本修通”阶段 + +### Exit Criteria + +1. `G2-A/G2-B/G2-C` 被视为已建立的家族基线 +2. 扩展工作被明确表述为“更多线损变体复制” + +## Phase 1:建立线损变体分组清单 + +### Objective + +先把“还要扩哪些线损场景”分组,而不是直接零散补样本。 + +### Tasks + +1. 从现有线损场景中筛出最接近当前家族的候选样本 +2. 按结构而不是按名称分组,至少分成: + - 双模式主报表型 + - 周报/日报单侧模式型 + - 排行/明细主链路型 + - 线损主链路 + 外部系统联动型 + - 异常诊断/详情下钻型 +3. 每组先挑 2 到 3 个代表样本 + +### Deliverables + +1. 线损变体分组清单 +2. 每组代表样本名单 + +### Exit Criteria + +1. 不再按单个场景零散推进 +2. 后续整改对象以“分组”为单位推进 + +## Phase 2:建立新增变体最小合同标准 + +### Objective + +为每一组新增线损变体先定义“什么叫最低可用”,再动生成器。 + +### Tasks + +1. 为每组定义最小合同,至少明确: + - 主 endpoint + - request template + - response path + - 关键字段或 column defs + - normalize / required fields +2. 明确哪些链路属于主合同 +3. 明确哪些链路属于扩展证据或风险证据 + +### Deliverables + +1. 线损变体最小合同表 +2. 每组的候选验证门槛 + +### Exit Criteria + +1. 每组都有统一判定口径 +2. 后续开发不再靠单样本临时拍脑袋 + +## Phase 3:按分组扩展生成链路 + +### Objective + +把新增变体分组逐类接入 analyzer / generator / readiness。 + +### Tasks + +1. 每一组先补 fixture +2. 每一组先补测试 +3. 再补子型判定 +4. 再补最小合同恢复 +5. 再补 readiness 分级 + +### Rules + +1. 任何一组都必须先有 fixture,再改逻辑 +2. 不允许多个组同时无边界并行扩散 +3. 一组完成后再推进下一组 + +### Deliverables + +1. 新增线损变体 fixture +2. 新增家族回归测试 +3. 对应 analyzer / generator / readiness 扩展实现 + +### Exit Criteria + +1. 至少新增 1 到 2 类线损变体可生成 +2. 原有 `G2-A/G2-B/G2-C` 不回退 + +## Phase 4:真实样本回归与扩展报告 + +### Objective + +把扩展后的线损家族能力回到真实样本上验证,而不是停在 fixture 层。 + +### Tasks + +1. 重新生成各组代表样本 +2. 对比: + - 子型判定 + - bootstrap + - request contract + - response / column / normalize contract + - readiness + - 是否进入候选验证名单 +3. 输出线损家族扩展回归报告 + +### Deliverables + +1. 真实样本生成结果 +2. 线损家族扩展回归报告 +3. 更新后的候选验证名单 + +### Acceptance Criteria + +1. 至少 2 个以上新增线损变体组进入候选验证阶段 +2. 不能归类的场景继续 `fail-close` +3. 原有三类 `G2-A/G2-B/G2-C` 不回退 + +## Workstream Breakdown + +本计划建议按以下工作流落地: + +1. `WS1` 线损变体盘点与分组 +2. `WS2` 新增变体最小合同设计 +3. `WS3` fixture / 回归测试扩展 +4. `WS4` analyzer / generator / readiness 扩展 +5. `WS5` 真实样本回归与报告 + +## File-Level Targets + +执行本计划时,预计触达以下资产类型: + +1. `docs/superpowers/plans/` +2. `docs/superpowers/reports/` +3. `tests/fixtures/generated_scene/` +4. `tests/` +5. `src/generated_scene/` + +## Completion Criteria + +本计划完成的标志是: + +1. 线损家族不再只有三个代表样本可解释 +2. 已建立“按分组复制”的扩展方法,而不是单样本修修补补 +3. 后续是否继续向更广场景扩展,可以建立在这份线损家族扩展结果上 diff --git a/docs/superpowers/plans/2026-04-18-scene-generator-ops-console-plan.md b/docs/superpowers/plans/2026-04-18-scene-generator-ops-console-plan.md new file mode 100644 index 0000000..0745b0b --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-scene-generator-ops-console-plan.md @@ -0,0 +1,237 @@ +# Scene Generator Ops Console Plan + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Spec:** [2026-04-18-scene-generator-ops-console-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-18-scene-generator-ops-console-design.md) + +## Plan Intent + +本计划用于将 scene generator 页面从“开发调试控制台”收敛为“面向运维的场景 Skill 生成工作台”,并把上游 `spec` 中已经明确的信息架构、中文化、显隐分层和交互流程拆解为可执行的实施步骤。 + +本计划只覆盖前端页面层与页面交互层的收敛,不扩展到 scene generator 后端分析逻辑或生成协议改造。 + +## Scope Guardrails + +本计划执行过程中,以下边界保持不变: + +1. 不修改 scene generator 后端接口协议 +2. 不重写分析算法或 Skill 生成逻辑 +3. 不删除现有调试信息,只调整默认显隐与展示层次 +4. 不把本计划扩展成新的前端设计系统建设 + +## Primary Outcome + +本计划的直接目标是让运维人员不需要理解 `Scene IR`、`workflowArchetype`、`requestTemplate` 等底层术语,也能完成: + +1. 选择场景目录 +2. 启动分析 +3. 判断是否可生成 +4. 启动生成 +5. 查看结果目录或失败原因 + +## Workstreams + +本计划拆分为四条工作流: + +1. `WS1` 信息架构与页面分层收敛 +2. `WS2` 中文化与业务态映射 +3. `WS3` 日志、结果与风险摘要收敛 +4. `WS4` 调试信息折叠与双层体验收口 + +## Phase Overview + +计划按五个阶段推进: + +1. Phase 0:冻结页面目标与口径 +2. Phase 1:完成信息架构重组 +3. Phase 2:完成中文化和业务态映射 +4. Phase 3:完成日志与结果区收敛 +5. Phase 4:完成调试层折叠和整体验收 + +## Phase 0:冻结页面目标与口径 + +### Objective + +先冻结该页面服务对象、默认使用模式、主状态表达与一级/二级/三级信息边界,避免实施过程中一边改布局一边改定位。 + +### Tasks + +1. 固化页面角色定义:运维执行者优先,开发 / 调试者次级 +2. 固化页面定位:运维工作台,而不是开发调试台 +3. 固化默认模式:默认运维模式,技术详情折叠 +4. 固化一级/二级/三级信息边界 +5. 固化状态表达、场景类型映射和可执行性映射口径 + +### Deliverables + +1. 页面角色说明 +2. 信息层级边界说明 +3. 状态与场景类型映射表 +4. 显隐策略说明 + +### Exit Criteria + +1. 页面默认服务对象不再摇摆 +2. 一级信息与技术详情边界不再摇摆 +3. 中文状态和类型映射口径冻结 + +## Phase 1:完成信息架构重组 + +### Objective + +将当前“配置区 + 分析区 + 生成日志 + 技术字段混排”的页面结构,重组为运维可理解的工作台结构。 + +### Tasks + +1. 重组顶部总览区 +2. 重组左侧主操作区 +3. 重组右侧结果摘要区 +4. 重组底部执行过程区 +5. 预留技术详情区并默认折叠 + +### Required Sections + +首屏结构固定为: + +1. 顶部总览区 +2. 左侧主操作区 +3. 右侧结果摘要区 +4. 底部执行过程区 +5. 技术详情区 + +### Deliverables + +1. 页面区块结构实现 +2. 区块标题与区块顺序实现 +3. 一级流程的视觉主路径 + +### Acceptance Criteria + +1. 首屏不再同时暴露大量技术细节 +2. 运维默认流程可以按“选择目录 -> 分析 -> 生成 -> 查看结果”完成 +3. 页面结构从“调试面板”转为“工作台” + +## Phase 2:完成中文化和业务态映射 + +### Objective + +将当前页面的大量英文标题、按钮和技术术语替换为面向运维的中文表述,并将底层技术状态映射为业务可读状态。 + +### Tasks + +1. 替换页面标题、副标题和区块标题 +2. 替换按钮文案和输入框占位文案 +3. 替换日志标签文案 +4. 建立 `Readiness` 中文映射 +5. 建立 archetype 中文映射 + +### Required Mappings + +最小映射集合包括: + +1. `Readiness A/B/C -> 可直接生成 / 可生成但需确认 / 暂不建议生成` +2. `single_request_table -> 单页报表` +3. `multi_mode_request -> 多模式报表` +4. `paginated_enrichment -> 分页明细` +5. `page_state_eval -> 页面检测` + +### Deliverables + +1. 中文标题与按钮实现 +2. 中文状态映射实现 +3. 中文场景类型映射实现 +4. 中文风险与结果文案实现 + +### Acceptance Criteria + +1. 首屏不再出现大面积未翻译英文 +2. 运维可直接理解主要状态和场景类型 +3. 技术术语不再作为首页主文案 + +## Phase 3:完成日志与结果区收敛 + +### Objective + +让页面日志和结果区优先服务“执行与排障”,而不是原始流式调试输出。 + +### Tasks + +1. 将 `Generation Log` 改为 `执行过程` +2. 将 `status / log / complete / error` 标签中文化 +3. 将原始流日志优先收敛为中文摘要日志 +4. 完善 `生成结果` 区的成功/失败状态展示 +5. 强化输出目录和结果文件入口 + +### Deliverables + +1. 中文摘要日志 +2. 生成结果卡片 +3. 失败原因摘要 +4. 输出目录入口 + +### Acceptance Criteria + +1. 运维无需阅读底层 SSE 技术消息也能理解执行过程 +2. 成功时能快速找到结果目录 +3. 失败时能快速看到中文失败原因 + +## Phase 4:完成调试层折叠和整体验收 + +### Objective + +保留开发与排障能力,但让其默认下沉为调试层,不干扰运维首屏使用。 + +### Tasks + +1. 将 `Scene IR`、`requestTemplate`、`evidence`、`workflow steps` 等收入口技术详情区 +2. 将 `scene-id`、`scene-kind`、`targetUrl override`、`workflow archetype override` 收入口高级设置 +3. 校验默认显隐逻辑 +4. 校验运维模式与调试模式体验边界 +5. 完成最终页面口径验收 + +### Deliverables + +1. 高级设置折叠区 +2. 技术详情折叠区 +3. 最终页面显隐策略实现 + +### Acceptance Criteria + +1. 运维首页只承载状态摘要、操作与结果 +2. 开发调试仍可通过折叠区查看完整技术信息 +3. 不再出现“默认首屏就是技术调试面板”的体验 + +## File-Level Planning Targets + +本计划后续实施至少覆盖以下资产: + +1. [sg_scene_generator.html](D:/data/ideaSpace/rust/sgClaw/claw-new/frontend/scene-generator/sg_scene_generator.html) +2. 与页面展示文案和显隐逻辑相关的前端脚本 +3. 与页面标题、区块结构和状态映射相关的前端样式与渲染逻辑 + +## Completion Criteria + +本计划完成的标志为: + +1. 页面默认形态已从“开发调试控制台”转为“运维工作台” +2. 首屏已完成中文化和业务态映射 +3. 运维默认流程可在首屏完成,不依赖技术详情区 +4. 调试信息仍保留,但不再默认淹没首页 +5. 失败原因、风险提示和结果目录对运维可直接理解 + +## Risks and Control Points + +1. 若只改文案不改信息架构,页面仍会保持臃肿 +2. 若只隐藏字段不重做结果摘要,运维仍无法快速判断是否可生成 +3. 若过度删除技术信息,会削弱开发与排障效率 +4. 若状态映射不统一,页面会出现中文标题下仍夹杂底层技术语义的割裂感 + +## Out of Plan + +以下事项不属于本计划直接交付范围: + +1. scene generator 后端分析逻辑重构 +2. Skill 生成协议变更 +3. 页面服务端接口新增 +4. 运维权限、账号体系或多角色权限控制 diff --git a/docs/superpowers/plans/2026-04-18-scene-skill-post-roadmap-execution-plan.md b/docs/superpowers/plans/2026-04-18-scene-skill-post-roadmap-execution-plan.md new file mode 100644 index 0000000..e1890bf --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-scene-skill-post-roadmap-execution-plan.md @@ -0,0 +1,277 @@ +# sgClaw Scene Skill Post-Roadmap Execution Plan + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Spec:** [2026-04-18-scene-skill-post-roadmap-execution-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-18-scene-skill-post-roadmap-execution-design.md) + +## Plan Intent + +This plan starts after the closure of the current `60-to-90 roadmap`. + +Its purpose is not to reopen `G1/G2/G3` implementation, but to: + +1. unify current execution state +2. start real-sample validation +3. plan the next bounded roadmap + +## Scope Guardrails + +1. Do not reopen completed `G1/G2/G3` repo-local baseline implementation. +2. Do not keep expanding fixtures as the primary mode of progress. +3. Do not silently pull `G4/G5` into implementation. +4. Do not directly implement unified login recovery in this plan. +5. Do not treat the old roadmap as still open-ended. +6. Phase 1 execution-board work must stay minimal and exist only to support Phase 2 real-sample validation. +7. Once `G2`, `G1-E`, and `G3` each have at least one mappable real sample, execution must move immediately into Phase 2. +8. Any new asset that does not directly support real-sample validation is deferred to Phase 3 or Phase 4. + +## Workstreams + +1. `WS1` Current Execution Board Unification +2. `WS2` Real Sample Validation +3. `WS3` Boundary and Runtime Gap Planning +4. `WS4` Next Roadmap Definition + +## Phase Overview + +1. Phase 0: Freeze Handover Boundary +2. Phase 1: Build Current Execution Board +3. Phase 2: Start Real Sample Validation +4. Phase 3: Define Boundary and Runtime Entry Rules +5. Phase 4: Publish the Next Roadmap + +Execution order is fixed as: + +`Phase 0 -> Phase 1 -> Phase 2 -> Phase 3 -> Phase 4` + +## Phase 0: Freeze Handover Boundary + +### Objective + +Freeze the boundary between the completed roadmap and the next-stage work. + +### Tasks + +1. Freeze current roadmap completion status. +2. Freeze current mainline family status for `G2`, `G1-E`, and `G3`. +3. Freeze current boundary family status for `G6/G7/G8`. +4. Freeze current deferred status for `G4/G5`. + +### Deliverables + +1. roadmap handover snapshot +2. next-stage scope statement +3. current family-state matrix + +### Acceptance Criteria + +1. old and new roadmap boundaries are explicit +2. next-stage work is no longer mixed into the old roadmap + +## Phase 1: Build Current Execution Board + +### Objective + +Create the minimum authoritative execution board required to start real-sample validation for the current `102-scene` status. + +### WS1 + +#### Task 1 + +Build one `102-scene current execution board`. + +#### Task 2 + +Define the stable scene status vocabulary: + +1. `promoted-baseline` +2. `promoted-expansion` +3. `boundary-family` +4. `deferred` +5. `degraded` +6. `unvalidated` + +#### Task 3 + +Map current `G2/G1-E/G3` scene promotions into the board. + +#### Task 4 + +Generate a snapshot-vs-current diff asset. + +#### Task 5 + +Stop Phase 1 immediately after `G2`, `G1-E`, and `G3` each have at least one mappable real sample entry in the board. + +### Deliverables + +1. `102-scene current execution board` +2. snapshot-vs-current diff report +3. scene-to-family status mapping + +### Acceptance Criteria + +1. every scene has one current-state label +2. promoted states are visible without reading multiple assets +3. board status matches current family assets +4. the board is limited to the minimum fields needed by Phase 2 validation records +5. no Phase 1 asset is added unless it directly supports real-sample validation + +## Phase 2: Start Real Sample Validation + +### Objective + +Create the next quality layer above fixture success. + +### WS2 + +#### Task 5 + +Choose the first real-sample validation set for: + +1. `G2` +2. `G1-E` +3. `G3` + +#### Task 6 + +Freeze validation criteria: + +1. compile success +2. readiness correctness +3. data correctness +4. output correctness +5. fail-closed correctness + +#### Task 7 + +Create a real-sample validation record template. + +#### Task 8 + +Record first-round real-sample results. + +#### Task 9 + +Write mismatches back into the execution board. + +#### Task 10 + +Reject requests for new board-only assets that do not unblock current validation execution. + +### Deliverables + +1. real-sample validation plan +2. real-sample record template +3. first-round validation records +4. mismatch taxonomy + +### Acceptance Criteria + +1. each mainline family has at least one real-sample record +2. real-sample status is separated from fixture status +3. mismatch reasons are explicit and reusable +4. Phase 2 begins as soon as `G2`, `G1-E`, and `G3` each have one mappable real sample + +## Phase 3: Define Boundary and Runtime Entry Rules + +### Objective + +Prepare the next bounded execution scope instead of drifting into it. + +### WS3 + +#### Task 11 + +Assess `G6/G7/G8` boundary-family readiness for future expansion. + +#### Task 12 + +Define formal entry criteria for `G4/G5`. + +#### Task 13 + +Build a runtime-gap matrix for: + +1. login recovery +2. host-runtime integration +3. transport/runtime gaps +4. local document and attachment workflows + +#### Task 14 + +Separate: + +1. archetype-family gaps +2. runtime-platform gaps + +### Deliverables + +1. boundary readiness note +2. deferred family entry criteria +3. runtime gap matrix +4. prioritization note + +### Acceptance Criteria + +1. `G4/G5` do not enter the next build round without documented criteria +2. runtime gaps are tracked separately from family expansion +3. next implementation scope has an explicit reason + +## Phase 4: Publish the Next Roadmap + +### Objective + +Replace open-ended continuation with a new bounded roadmap. + +### WS4 + +#### Task 15 + +Write the next-stage design. + +#### Task 16 + +Write the next-stage plan. + +#### Task 17 + +Define milestone ordering. + +#### Task 18 + +Define next-stage completion criteria. + +### Deliverables + +1. post-roadmap design +2. post-roadmap plan +3. milestone table +4. completion criteria + +### Acceptance Criteria + +1. new implementation work has a new roadmap +2. the old roadmap is no longer implicitly extended +3. next-stage completion can be judged independently + +## Milestone Order + +1. Freeze the handover boundary +2. Unify the execution board +3. Start real-sample validation +4. Freeze boundary/runtime entry rules +5. Publish the next roadmap + +No new implementation round should begin before milestones 1 to 4 are complete. +No Phase 1 expansion should continue after the minimum board needed for milestone 3 is available. + +## Completion Criteria + +This plan is complete when: + +1. the current roadmap is explicitly closed +2. the execution board is unified +3. real-sample validation is formally underway +4. a new bounded roadmap exists for post-roadmap work diff --git a/docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md b/docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md new file mode 100644 index 0000000..b4d7e57 --- /dev/null +++ b/docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md @@ -0,0 +1,128 @@ +# sgClaw Scene Skill Real Sample Validation Roadmap Plan + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Spec:** [2026-04-18-scene-skill-real-sample-validation-roadmap-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-18-scene-skill-real-sample-validation-roadmap-design.md) + +## Plan Intent + +This plan starts after the post-roadmap execution board and first-round validation layer are in place. + +Its purpose is to: + +1. execute selected real samples for `G2`, `G1-E`, and `G3` +2. use validation outcomes to decide the next bounded implementation scope +3. avoid drifting back into fixture-first or asset-first work + +## Scope Guardrails + +1. Do not reopen completed repo-local baseline implementation for `G1/G2/G3`. +2. Do not create new board-only assets unless they unblock current validation execution. +3. Do not open `G4/G5` implementation before formal entry decisions are documented. +4. Do not pull `G6/G7/G8` into the next build round without explicit validation pressure. + +## Workstreams + +1. `WS1` Mainline Real Sample Execution +2. `WS2` Validation Result Triage +3. `WS3` Boundary Runtime Entry Decision +4. `WS4` Deferred Family Entry Decision + +## Phase 0: Execute Mainline Real Samples + +### Objective + +Convert selected `G2`, `G1-E`, and `G3` anchors into executed real-sample records. + +### Tasks + +1. Execute `G2` anchor validation updates from the current mismatch baseline. +2. Keep `G1-E` real pass anchor as the current positive baseline. +3. Execute the pending `G3` real sample. +4. Write all outcomes into the validation record layer. + +### Deliverables + +1. updated real-sample validation records +2. updated mismatch taxonomy usage +3. updated execution-board validation statuses + +### Acceptance Criteria + +1. `G2`, `G1-E`, and `G3` each have executed real-sample records +2. `selected-not-yet-run` no longer remains for current mainline anchors + +## Phase 1: Triage Results Into Scope Decisions + +### Objective + +Use validation results, not fixture status, to choose the next bounded scope. + +### Tasks + +1. classify each mainline family result as `stable`, `mismatch-driven`, or `blocked-by-runtime` +2. identify which problems are compiler-family gaps and which are runtime gaps +3. define the next recommended scope from validation evidence + +### Deliverables + +1. validation triage report +2. next-scope recommendation + +### Acceptance Criteria + +1. the next scope is justified by executed validation evidence +2. repo-local success no longer acts as the sole decision signal + +## Phase 2: Boundary Runtime Entry Decision + +### Objective + +Decide whether `G6/G7/G8` should stay boundary-only or enter a runtime-focused roadmap. + +### Tasks + +1. compare boundary-family runtime gaps against executed validation pressure +2. decide whether any boundary family should enter the next roadmap +3. document non-entry decisions explicitly when scope stays closed + +### Deliverables + +1. boundary runtime decision note +2. next-roadmap inclusion or exclusion list + +### Acceptance Criteria + +1. `G6/G7/G8` entry decisions are explicit +2. no boundary family enters by drift + +## Phase 3: Deferred Family Entry Decision + +### Objective + +Decide whether `G4/G5` should remain closed or enter a later roadmap. + +### Tasks + +1. compare deferred-family criteria against current validation pressure +2. confirm whether `G4/G5` remain deferred or degraded +3. record the decision before any new implementation starts + +### Deliverables + +1. deferred family decision note +2. updated next-roadmap scope boundary + +### Acceptance Criteria + +1. `G4/G5` entry decisions are explicit +2. deferred families do not enter implementation implicitly + +## Completion Criteria + +This plan is complete when: + +1. all selected mainline anchors have executed real-sample records +2. the next implementation scope is selected from validation outcomes +3. boundary and deferred family entry decisions are documented diff --git a/docs/superpowers/plans/2026-04-19-102-final-coverage-status-rollup-plan.md b/docs/superpowers/plans/2026-04-19-102-final-coverage-status-rollup-plan.md new file mode 100644 index 0000000..ddfcb75 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-final-coverage-status-rollup-plan.md @@ -0,0 +1,51 @@ +# 102 Final Coverage Status Rollup Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer E` +> Status: Active + +## Plan Intent + +Create the final 102-scene coverage rollup after residual 13 closure. This plan publishes a candidate/status view only. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` +2. `tests/fixtures/generated_scene/residual_13_reconciliation_candidates_2026-04-19.json` +3. `tests/fixtures/generated_scene/boundary_residual_hold_decision_2026-04-19.json` +4. `tests/fixtures/generated_scene/bootstrap_target_residual_isolation_2026-04-19.json` +5. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` + +## Allowed Files + +1. `tests/fixtures/generated_scene/final_coverage_status_rollup_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-102-final-coverage-status-rollup-report.md` + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `src/generated_scene/analyzer.rs` +3. `src/generated_scene/generator.rs` + +## Tasks + +1. Load the 102-scene full coverage reconciliation candidate view. +2. Load the residual 13 reconciliation candidate view. +3. Replace matching residual scenes in the 102 view with residual follow-up candidate statuses. +4. Attach boundary/bootstrap overlay decisions where present. +5. Produce final coverage summary. +6. Publish the rollup JSON. +7. Publish the rollup report. + +## Completion Criteria + +1. Final rollup contains `102` scenes. +2. Final summary has `95` framework auto-pass candidates and `7` structured fail-closed candidates. +3. There are `0` source-unreadable, unsupported-family, missing-source, and misclassified-unresolved records. +4. Official execution board is not modified. +5. Report names the next bounded step. + +## Stop Statement + +Stop after the final coverage rollup JSON and report are published. Do not update the official execution board under this plan. diff --git a/docs/superpowers/plans/2026-04-19-102-framework-closure-rollup-plan.md b/docs/superpowers/plans/2026-04-19-102-framework-closure-rollup-plan.md new file mode 100644 index 0000000..ec3a536 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-framework-closure-rollup-plan.md @@ -0,0 +1,42 @@ +# 102 Framework Closure Rollup Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Status: Draft + +## Plan Intent + +Publish the final 102-scene framework closure rollup after the final-2 residual roadmaps and board refresh are complete. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. optional `tests/fixtures/generated_scene/final_2_official_board_reconciliation_refresh_2026-04-19.json` + +## Allowed Files + +1. `tests/fixtures/generated_scene/scene_skill_102_framework_closure_rollup_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-scene-skill-102-framework-closure-rollup-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. Load official board. +2. Count framework statuses. +3. List any remaining structured fail-closed scenes and their named next actions. +4. Verify unresolved count is zero. +5. Publish closure rollup JSON and report. + +## Expected Delta + +No implementation delta. This is the final reporting layer. + +## Stop Statement + +Stop after publishing the 102 framework closure rollup. Do not start another runtime roadmap under this plan. diff --git a/docs/superpowers/plans/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-plan.md b/docs/superpowers/plans/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-plan.md new file mode 100644 index 0000000..8f1dc8e --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-plan.md @@ -0,0 +1,62 @@ +# 102 Full Coverage Follow-Up Sweep And Reconciliation Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer E` +> Upstream Design: `docs/superpowers/specs/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-design.md` + +## Plan Intent + +Run one fixed full 102-scene follow-up sweep after Route 2 through Route 6 have closed, then publish a policy-governed reconciliation candidate view. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/g3_enrichment_request_closure_followup_2026-04-19.json` +3. `tests/fixtures/generated_scene/g3_export_plan_closure_followup_2026-04-19.json` +4. `tests/fixtures/generated_scene/g3_residual_contract_closure_2026-04-19.json` +5. `tests/fixtures/generated_scene/g2_remaining_fail_closed_closure_followup_2026-04-19.json` +6. `tests/fixtures/generated_scene/g1e_remaining_fail_closed_closure_followup_2026-04-19.json` +7. `tests/fixtures/generated_scene/boundary_fail_closed_decision_2026-04-19.json` +8. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` + +## Allowed Files + +1. follow-up sweep JSON asset +2. reconciliation candidate JSON asset +3. follow-up sweep report +4. reconciliation candidate report + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +4. family implementation assets + +## Tasks + +1. run fixed 102-scene follow-up sweep +2. classify raw sweep result +3. apply Route 5 route decisions where applicable +4. apply Route 6 promotion policy to build reconciliation candidate view +5. publish coverage delta and remaining-gap report + +## Expected Coverage Delta + +The plan should quantify cumulative delta after Routes 2, 3, and 4. + +## Completion Criteria + +1. total scene count is 102 +2. every scene has one raw sweep status +3. every scene has one reconciliation candidate status +4. coverage delta is reported +5. official execution board is not modified + +## Stop Statement + +Stop after publishing the follow-up sweep and reconciliation candidate reports. + +Do not start a new implementation route under this plan. diff --git a/docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-plan.md b/docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-plan.md new file mode 100644 index 0000000..b690a6c --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-plan.md @@ -0,0 +1,197 @@ +# 102 Full Sweep Dry-Run Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-102-full-sweep-dry-run-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-design.md) + +## Plan Intent + +Run one bounded, read-only full sweep over the `102` scene ledger to measure actual generic `scene -> skill` coverage. + +The plan answers: + +`how many of the 102 scenes can the current generic analyzer/generator handle today?` + +## Scope Guardrails + +1. do not change analyzer logic +2. do not change generator logic +3. do not promote scenes into `scene_execution_board_2026-04-18.json` +4. do not add new family baselines +5. do not create new family implementation plans +6. do not fix failures during this dry-run +7. do not run outside the fixed `102` scene set + +## Fixed Inputs + +1. execution board: `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. scene root: `D:/desk/智能体资料/全量业务场景/一平台场景` +3. generator command: `cargo run --bin sg_scene_generate` + +## Fixed Outputs + +1. dry-run result: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +2. dry-run output root: `examples/full_sweep_dry_run_2026-04-19` +3. report: `docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md` + +## Workstreams + +1. `WS1` Build Scene Inventory +2. `WS2` Run Analyzer/Generator Dry-Run +3. `WS3` Classify Results +4. `WS4` Publish Coverage Report + +## Phase 0: Freeze Dry-Run Boundary + +### Objective + +Make the dry-run a measurement exercise only. + +### Tasks + +1. freeze the execution board input +2. freeze the local scene root +3. freeze the dry-run output paths +4. explicitly mark the run as read-only with respect to generator behavior and board status + +### Deliverables + +1. fixed input statement +2. fixed output statement +3. dry-run no-promotion statement + +### Acceptance Criteria + +1. no analyzer/generator implementation file is edited for this dry-run +2. `scene_execution_board_2026-04-18.json` is not modified by dry-run results +3. failures are recorded, not fixed + +## Phase 1: Build Scene Inventory + +### Objective + +Construct a deterministic inventory of all `102` scene names and expected source directories. + +### Tasks + +1. read `scene_execution_board_2026-04-18.json` +2. extract all scene entries +3. map each scene name to `D:/desk/智能体资料/全量业务场景/一平台场景/` +4. check whether each source directory exists +5. assign initial inventory status: + - `source-present` + - `missing-source` + +### Deliverables + +1. inventory section inside `full_sweep_dry_run_2026-04-19.json` +2. missing-source list + +### Acceptance Criteria + +1. inventory count equals `102` +2. every scene has a source path +3. missing source does not stop the sweep + +## Phase 2: Run Analyzer/Generator Dry-Run + +### Objective + +Attempt current generic generation for every source-present scene without fixing failures. + +### Tasks + +1. generate a stable safe scene id for each scene +2. invoke `sg_scene_generate` for each source-present scene +3. write outputs under `examples/full_sweep_dry_run_2026-04-19` +4. for successful generation, read `references/generation-report.json` +5. for failed generation, capture stderr/stdout and exit code +6. continue until all `102` scenes are processed + +### Deliverables + +1. per-scene dry-run execution record +2. generated output root for successful scenes +3. captured error messages for failed scenes + +### Acceptance Criteria + +1. every source-present scene has a generator result +2. no failure aborts the full sweep +3. generator results are isolated under the dry-run output root + +## Phase 3: Classify Results + +### Objective + +Turn raw dry-run output into actionable coverage categories. + +### Tasks + +1. classify generated `A/B` readiness with no blocker as `auto-pass` +2. classify generator blocking with known gate/contract reason as `fail-closed-known` +3. classify obvious family mismatch as `misclassified` +4. classify evidence outside current families as `unsupported-family` +5. classify absent directories as `missing-source` +6. classify read/analyze failures as `source-unreadable` +7. compute top blockers by frequency +8. compute counts by inferred archetype + +### Deliverables + +1. final dry-run status per scene +2. summary counts +3. by-archetype counts +4. top-blocker list + +### Acceptance Criteria + +1. every scene has exactly one final status +2. total classified count equals `102` +3. every non-pass scene has a reason + +## Phase 4: Publish Report + +### Objective + +Answer the coverage question without changing project state. + +### Tasks + +1. write `full_sweep_dry_run_2026-04-19.json` +2. write `2026-04-19-102-full-sweep-dry-run-report.md` +3. report these four headline numbers: + - `real-sample executed pass` + - `code-backed ledger coverage` + - `dry-run auto-pass` + - `dry-run actionable coverage` +4. list next recommended blocker, but do not start implementation + +### Deliverables + +1. dry-run JSON +2. dry-run report + +### Acceptance Criteria + +1. report can answer actual generic coverage over `102` scenes +2. report separates proven coverage from predicted/dry-run coverage +3. report does not promote scene status + +## Completion Criteria + +This plan is complete when: + +1. all `102` scenes are included in the dry-run result +2. the dry-run result has stable summary counts +3. the report explains the gap between `5/102`, `23/102`, and dry-run coverage +4. no generator logic or execution board status is modified + +## Non-Negotiable Stop Rule + +After this dry-run starts: + +1. do not fix generator failures inside the sweep +2. do not create new family implementation plans from a single failure +3. do not update the execution board automatically +4. stop after publishing the dry-run result and report diff --git a/docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-triage-plan.md b/docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-triage-plan.md new file mode 100644 index 0000000..7281dd6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-triage-plan.md @@ -0,0 +1,240 @@ +# 102 Full Sweep Dry-Run Triage Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: `docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-triage-design.md` + +## Plan Intent + +Turn the `62` non-pass records from the full sweep into concrete triage buckets while staying measurement-only. + +The plan must not fix generator failures. It only explains them. + +## Fixed Inputs + +1. dry-run result: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +2. dry-run output root: `examples/full_sweep_dry_run_2026-04-19` +3. execution board: `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +4. scene root: `D:/desk/智能体资料/全量业务场景/一平台场景` + +## Fixed Outputs + +1. triage result: `tests/fixtures/generated_scene/full_sweep_dry_run_triage_2026-04-19.json` +2. triage report: `docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-triage-report.md` + +## Non-Negotiable Scope Guardrails + +1. do not edit analyzer implementation +2. do not edit generator implementation +3. do not update `scene_execution_board_2026-04-18.json` +4. do not promote any scene +5. do not add new family baselines +6. do not start implementation correction during triage +7. do not expand beyond the fixed `102` scene set + +## Workstreams + +1. `WS1` Timeout Triage +2. `WS2` Misclassification Triage +3. `WS3` No-Report Failure Triage +4. `WS4` Publish Triage Result + +## Phase 0: Freeze Triage Boundary + +### Objective + +Make the triage a classification exercise only. + +### Tasks + +1. read the upstream dry-run result +2. verify the upstream result has `102` scenes +3. verify non-pass buckets are: + - `31` timeout records + - `5` misclassified records + - `25` no-report records + - `1` bootstrap-target record +4. freeze the triage order: + - timeout first + - misclassification second + - no-report third + +### Deliverables + +1. frozen triage input statement +2. frozen non-pass bucket counts +3. frozen triage order + +### Acceptance Criteria + +1. triage input count is stable +2. no code is changed +3. no board status is updated + +## Phase 1: Timeout Triage + +### Objective + +Split the `31` timeout records into second-level reasons. + +### Tasks + +1. select records where `dryRunStatus = source-unreadable` +2. verify reason is `generator timeout after 30s` +3. collect source directory metadata: + - source directory exists + - file count + - total source bytes + - largest file path + - largest file bytes +4. collect dry-run artifact metadata: + - generated skill directory exists + - references directory exists + - generation report exists +5. preserve board context: + - current group + - current status + - current source asset + - real sample record id +6. optionally run one diagnostic longer-timeout attempt for classification only +7. assign one timeout label: + - `timeout-known-family-sample` + - `timeout-unvalidated-source` + - `timeout-large-source` + - `timeout-command-hang` + - `timeout-generator-slow-but-progressing` + - `timeout-undetermined` + +### Deliverables + +1. `timeoutTriage[]` records in the triage JSON +2. timeout label summary +3. timeout size/source metadata summary + +### Acceptance Criteria + +1. all `31` timeout records have a second-level label +2. no timeout is treated as unsupported family by default +3. no long-timeout rerun result promotes a scene + +## Phase 2: Misclassification Triage + +### Objective + +Explain the `5` board-vs-archetype conflicts. + +### Tasks + +1. select records where `dryRunStatus = misclassified` +2. preserve: + - board expected group + - expected archetype + - inferred archetype + - current source asset + - real sample layer status +3. inspect existing dry-run report path when present +4. collect route-conflict evidence: + - whether host bridge evidence dominates + - whether G3 or G1-E evidence is still present + - whether current board expectation came from baseline or expansion +5. assign one routing triage label: + - `route-overprefer-host-bridge` + - `board-expectation-stale` + - `mixed-workflow-host-bridge-valid` + - `scene-family-split-needed` + - `misclassification-undetermined` + +### Deliverables + +1. `misclassificationTriage[]` records in the triage JSON +2. routing conflict summary +3. high-priority routing risk list + +### Acceptance Criteria + +1. all `5` misclassified records have a routing label +2. no routing code is changed +3. the report identifies whether implementation correction is justified later + +## Phase 3: No-Report Failure Triage + +### Objective + +Split the `25` generic no-report failures into concrete failure stages. + +### Tasks + +1. select records where: + - `dryRunStatus = fail-closed-known` + - `reason = generator failed without generation report` +2. collect command artifacts: + - exit code + - stdout tail + - stderr tail +3. inspect output artifacts: + - skill directory exists + - references directory exists + - any report file exists +4. infer one failure stage: + - `source-scan` + - `analyzer` + - `ir-assembly` + - `readiness-before-report` + - `compiler-package-write` + - `panic-or-process-error` + - `unknown-no-report` +5. keep `bootstrap_target` failure separate + +### Deliverables + +1. `noReportFailureTriage[]` records in the triage JSON +2. `bootstrapTargetFailures[]` records in the triage JSON +3. failure-stage summary + +### Acceptance Criteria + +1. all `25` no-report failures have an inferred failure stage +2. the `bootstrap_target` case is not hidden in the no-report bucket +3. every non-pass record remains explainable without implementation changes + +## Phase 4: Publish Triage Result + +### Objective + +Publish a bounded triage result and stop. + +### Tasks + +1. write `full_sweep_dry_run_triage_2026-04-19.json` +2. write `2026-04-19-102-full-sweep-dry-run-triage-report.md` +3. include: + - timeout triage summary + - misclassification triage summary + - no-report triage summary + - recommended next blocker +4. explicitly state that the triage does not promote scenes or start fixes + +### Deliverables + +1. triage JSON +2. triage report + +### Acceptance Criteria + +1. all `62` non-pass records are covered +2. every non-pass record has a second-level explanation +3. the report identifies the next blocker without implementing it +4. no generator/analyzer file is modified +5. `scene_execution_board_2026-04-18.json` is not modified + +## Completion Criteria + +This plan is complete when: + +1. `31` timeout records have timeout labels +2. `5` misclassified records have routing labels +3. `25` no-report failures have failure stages +4. `1` bootstrap-target failure is separately tracked +5. the triage JSON and report are published +6. execution stops without implementation work + diff --git a/docs/superpowers/plans/2026-04-19-102-full-sweep-improvement-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-102-full-sweep-improvement-roadmap-plan.md new file mode 100644 index 0000000..5a67358 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-full-sweep-improvement-roadmap-plan.md @@ -0,0 +1,305 @@ +# 102 Full Sweep Improvement Roadmap Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: `docs/superpowers/specs/2026-04-19-102-full-sweep-improvement-roadmap-design.md` +> Upstream Dry-Run Result: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +> Upstream Triage Result: `tests/fixtures/generated_scene/full_sweep_dry_run_triage_2026-04-19.json` + +## Plan Intent + +Turn the `102` scene dry-run and triage findings into a governed improvement roadmap. + +This plan is intentionally broad like the earlier `60-to-90` roadmap. It coordinates multiple bounded implementation tracks instead of starting isolated fixes from individual failures. + +## Baseline + +Current measured baseline: + +| Metric | Count | +| --- | ---: | +| Real-sample executed pass | 5 / 102 | +| Code-backed ledger coverage | 23 / 102 | +| Dry-run auto-pass | 40 / 102 | +| Dry-run actionable coverage | 66 / 102 | + +Current triage baseline: + +| Bucket | Count | Triage conclusion | +| --- | ---: | --- | +| Timeout | 31 | `19 timeout-unvalidated-source`, `8 timeout-large-source`, `4 timeout-known-family-sample` | +| Misclassified | 5 | all `route-overprefer-host-bridge` | +| No-report failure | 25 | all `readiness-before-report` | +| Bootstrap target | 1 | separate `bootstrap_target` | + +## Scope Guardrails + +1. do not add new scene families +2. do not update `scene_execution_board_2026-04-18.json` inside this roadmap +3. do not promote scenes directly from diagnostic or dry-run results +4. do not reopen completed real-sample passes except as regression checks +5. do not start `G4/G5` +6. do not implement full login recovery +7. do not implement full host runtime transport +8. do not implement local document attachment runtime +9. do not create unbounded micro-plans from a single failure + +## Workstreams + +1. `WS1` Timeout Diagnostics and Scan Budget +2. `WS2` Routing Boundary Correction +3. `WS3` Structured Fail-Closed Reporting +4. `WS4` Follow-Up Sweep and Coverage Delta + +## Phase 0: Freeze Improvement Baseline + +### Objective + +Freeze the dry-run and triage outputs as the only accepted inputs to this roadmap. + +### Tasks + +1. freeze `full_sweep_dry_run_2026-04-19.json` +2. freeze `full_sweep_dry_run_triage_2026-04-19.json` +3. freeze the four headline metrics: + - `5/102` real-sample pass + - `23/102` code-backed ledger coverage + - `40/102` dry-run auto-pass + - `66/102` dry-run actionable coverage +4. freeze the problem buckets: + - `4` known-family timeouts + - `8` large-source timeouts + - `19` unvalidated-source timeouts + - `5` host-bridge over-preference cases + - `25` readiness-before-report failures + - `1` bootstrap-target failure + +### Deliverables + +1. baseline statement +2. frozen blocker inventory +3. roadmap entry criteria + +### Acceptance Criteria + +1. no additional scene is added to scope +2. no implementation starts before the baseline is frozen +3. dry-run and triage assets are treated as immutable inputs + +## Phase 1: Known-Family Timeout Diagnostics + +### Objective + +Resolve the highest-priority ambiguity: known-family scenes that timed out in the full sweep. + +### Tasks + +1. select only records labeled `timeout-known-family-sample` +2. capture source scale metrics and previous family context +3. run bounded diagnostic attempts if needed +4. classify each record as: + - `known-family-rerun-pass` + - `known-family-source-scale-timeout` + - `known-family-generator-hotspot` + - `known-family-contract-blocked-after-long-run` + - `known-family-timeout-unresolved` +5. publish diagnostic result + +### Deliverables + +1. known-family timeout diagnostic JSON +2. known-family timeout diagnostic report + +### Acceptance Criteria + +1. all `4` known-family timeout records are classified +2. no scene is promoted from diagnostic success +3. no generator logic is changed in the diagnostic step + +## Phase 2: Source-Scale and Scan-Budget Improvement + +### Objective + +Reduce timeout noise caused by oversized source directories and obvious vendor/library files. + +### Tasks + +1. analyze `timeout-large-source` and `timeout-unvalidated-source` +2. define source scan budget policy +3. define vendor/library ignore policy +4. implement only bounded source scanning or timeout reporting changes +5. verify no canonical or real-sample regression is introduced + +### Deliverables + +1. source scan budget policy +2. bounded scan implementation if approved by Phase 1 evidence +3. timeout reporting regression tests + +### Acceptance Criteria + +1. large source directories no longer dominate the full sweep by accidental vendor-file scanning +2. known-family samples are not made worse +3. archetype semantics are unchanged + +## Phase 3: Host-Bridge Route Over-Preference Correction + +### Objective + +Correct or formally adjudicate the five cases where `host_bridge_workflow` over-absorbed `G3` or `G1-E` expected scenes. + +### Tasks + +1. select the `5` `route-overprefer-host-bridge` records +2. compare business-chain evidence against host-bridge evidence +3. define routing precedence rules for: + - `G3` vs `G6` + - `G1-E` vs `G6` +4. implement bounded routing correction only if evidence supports it +5. preserve regressions for: + - `G3` real-sample pass + - `G1-E` real-sample pass + - `G6` real-sample pass +6. classify each case as: + - `route-corrected-to-g3` + - `route-corrected-to-g1e` + - `board-expectation-reclassified` + - `valid-host-bridge-workflow` + - `route-conflict-unresolved` + +### Deliverables + +1. route over-preference correction report +2. routing regression tests +3. updated dry-run classification for the five fixed records + +### Acceptance Criteria + +1. all `5` route conflicts are adjudicated +2. `host_bridge_workflow` no longer wins solely because host evidence exists +3. existing `G6` pass remains stable +4. no broad routing rewrite is introduced + +## Phase 4: Structured Fail-Closed Reporting + +### Objective + +Convert `readiness-before-report` failures into structured failure reports instead of process-level no-report failures. + +### Tasks + +1. select the `25` `readiness-before-report` records +2. identify where generation exits before report emission +3. define a minimal failure-report schema for pre-package fail-closed +4. emit structured failure records with: + - inferred archetype + - failed gate + - blocker reason + - missing contract pieces + - stderr summary if any +5. keep scenes failing unless their contracts are actually complete + +### Deliverables + +1. pre-report fail-closed schema +2. implementation of structured failure report emission +3. regression covering at least one `paginated_enrichment`, one `local_doc_pipeline`, one `multi_mode_request`, and one `single_request_enrichment` pre-report failure + +### Acceptance Criteria + +1. no-report failures are reduced or eliminated as a category +2. failing scenes still fail closed +3. failure reasons become machine-readable +4. auto-pass count is not inflated by looser gates + +## Phase 5: Bootstrap Target Isolation + +### Objective + +Keep the single `bootstrap_target` failure isolated and decide whether it belongs to later bootstrap normalization work. + +### Tasks + +1. preserve `用户停电频次分析监测` as a separate bootstrap failure +2. inspect whether the failure is caused by missing target URL, domain mismatch, or unsupported bootstrap pattern +3. produce a bootstrap isolation note +4. do not implement login or bootstrap auto-recovery + +### Deliverables + +1. bootstrap target isolation note +2. decision whether the case enters a later bootstrap-normalization roadmap + +### Acceptance Criteria + +1. the bootstrap case does not pollute readiness-before-report work +2. no login recovery implementation is started + +## Phase 6: Follow-Up Full Sweep and Coverage Delta + +### Objective + +Measure whether the bounded improvements improved generic coverage. + +### Tasks + +1. rerun the fixed `102` scene full sweep with the same scene set +2. produce a new dry-run result +3. compare against the baseline: + - auto-pass delta + - actionable coverage delta + - timeout delta + - misclassification delta + - no-report delta +4. publish coverage delta report +5. decide whether to move to execution-board status sync or another bounded improvement cycle + +### Deliverables + +1. follow-up full sweep JSON +2. coverage delta report +3. remaining blocker decision board + +### Acceptance Criteria + +1. scene set remains exactly `102` +2. baseline and follow-up are comparable +3. improvements are quantified, not assumed +4. no execution board status is changed automatically + +## Milestone Order + +The order is fixed: + +1. Phase 0: freeze baseline +2. Phase 1: known-family timeout diagnostics +3. Phase 2: source-scale and scan-budget improvement +4. Phase 3: host-bridge route over-preference correction +5. Phase 4: structured fail-closed reporting +6. Phase 5: bootstrap target isolation +7. Phase 6: follow-up full sweep and coverage delta + +Do not start Phase 3 before Phase 1 is completed. Known-family timeout ambiguity affects the interpretation of current coverage. + +Do not start Phase 6 before Phases 2-5 have either completed or been explicitly deferred with reasons. + +## Completion Criteria + +This roadmap is complete when: + +1. known-family timeouts are no longer mixed with generic timeout noise +2. host-bridge over-preference cases are adjudicated +3. readiness-before-report failures become structured fail-closed records +4. the bootstrap target case is isolated +5. a follow-up full sweep quantifies coverage delta +6. no new family is introduced as a shortcut around current blockers + +## Out of Plan + +1. new family implementation +2. `G4/G5` implementation +3. browser host runtime transport +4. login recovery +5. attachment/local document runtime +6. automatic execution board promotion + diff --git a/docs/superpowers/plans/2026-04-19-102-sweep-status-reconciliation-plan.md b/docs/superpowers/plans/2026-04-19-102-sweep-status-reconciliation-plan.md new file mode 100644 index 0000000..d1e4bb8 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-102-sweep-status-reconciliation-plan.md @@ -0,0 +1,140 @@ +# 102 Sweep Status Reconciliation Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: `docs/superpowers/specs/2026-04-19-102-sweep-status-reconciliation-design.md` + +## Plan Intent + +Reconcile the follow-up `102` sweep result with the final route-conflict decisions so the next roadmap uses a trustworthy status baseline. + +This plan is a status reconciliation plan, not an implementation plan. + +## Scope Guardrails + +1. do not modify `src/generated_scene/analyzer.rs` +2. do not modify `src/generated_scene/generator.rs` +3. do not modify `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +4. do not promote any scene +5. do not add or modify family baselines +6. do not rerun the `102` sweep +7. do not implement fixes for fail-closed or timeout records + +## Phase 0: Freeze Inputs + +### Objective + +Freeze the exact reconciliation inputs. + +### Tasks + +1. read `full_sweep_improvement_followup_2026-04-19.json` +2. read `remaining_route_conflict_decisions_2026-04-19.json` +3. verify follow-up sweep scene count is `102` +4. verify route-decision conflict count is `4` + +### Deliverables + +1. input validation summary + +### Acceptance Criteria + +1. reconciliation does not proceed if follow-up scene count is not `102` +2. reconciliation does not proceed if route-decision count is not `4` + +## Phase 1: Merge Route Decisions + +### Objective + +Apply route-conflict decisions as a reconciliation overlay without changing raw sweep status. + +### Tasks + +1. match route decisions by `sceneId` +2. for each matching scene, keep `dryRunStatus = misclassified` +3. add `routeDecision = valid-host-bridge-workflow` +4. set `reconciledStatus = adjudicated-valid-host-bridge` +5. preserve decision reason and evidence summary + +### Deliverables + +1. route-decision overlay records + +### Acceptance Criteria + +1. all `4` route decisions match a follow-up scene +2. all `4` are reconciled to `adjudicated-valid-host-bridge` +3. no broad status rewrite is performed + +## Phase 2: Build Reconciled Status Counts + +### Objective + +Build the reconciled status summary for all `102` scenes. + +### Tasks + +1. copy all follow-up scene records into a new reconciliation asset +2. assign `reconciledStatus` for every scene +3. count statuses: + - `auto-pass` + - `fail-closed-known` + - `adjudicated-valid-host-bridge` + - `source-unreadable` + - `missing-source` + - `unsupported-family` + - `misclassified-unresolved` +4. summarize fail-closed records by archetype and reason +5. preserve remaining timeout records as unresolved timeout inputs + +### Deliverables + +1. `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` + +### Acceptance Criteria + +1. total scene count is `102` +2. reconciled status count total is `102` +3. unresolved misclassification count is `0` +4. timeout count remains `2` + +## Phase 3: Publish Reconciliation Report + +### Objective + +Make the reconciled state readable and actionable. + +### Tasks + +1. summarize raw follow-up counts +2. summarize reconciled counts +3. list `4` valid-host-bridge adjudications +4. list `2` remaining timeout inputs +5. summarize `48` fail-closed-known records as the next implementation-analysis candidate +6. state explicitly that the execution board was not changed + +### Deliverables + +1. `docs/superpowers/reports/2026-04-19-102-sweep-status-reconciliation-report.md` + +### Acceptance Criteria + +1. report explains why raw `misclassified = 4` no longer means unresolved route bugs +2. report identifies the next likely roadmap input without starting it +3. report confirms no code or execution-board changes + +## Completion Criteria + +This plan is complete when: + +1. reconciliation JSON exists +2. reconciliation report exists +3. all `4` route conflicts are represented as adjudicated valid host-bridge workflows +4. no unresolved misclassification remains +5. `2` timeouts and `48` fail-closed records remain visible as separate future inputs + +## Stop Statement + +Stop after publishing the reconciliation JSON and report. + +Do not start the next roadmap in this plan. diff --git a/docs/superpowers/plans/2026-04-19-bootstrap-target-normalization-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-bootstrap-target-normalization-roadmap-plan.md new file mode 100644 index 0000000..de511ab --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-bootstrap-target-normalization-roadmap-plan.md @@ -0,0 +1,44 @@ +# Bootstrap Target Normalization Roadmap Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Fixed Scene: `sweep-091-scene` +> Status: Draft + +## Plan Intent + +Run a bounded bootstrap target normalization slice for the single remaining `page_state_eval` residual. + +## Fixed Input Bucket + +1. `sweep-091-scene` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/scene_generator_test.rs` +4. `tests/fixtures/generated_scene/bootstrap_target_normalization_followup_2026-04-19.json` +5. `tests/fixtures/generated_scene/bootstrap_target_normalization_reconciliation_candidates_2026-04-19.json` +6. `docs/superpowers/reports/2026-04-19-bootstrap-target-normalization-roadmap-report.md` + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. Freeze the current `sweep-091-scene` generation report. +2. Identify whether the failure is a missing target URL, target-domain ambiguity, or policy-held navigation dependency. +3. Implement at most one bounded bootstrap target normalization slice if the target can be recovered from deterministic source evidence. +4. Rerun only `sweep-091-scene`. +5. Publish follow-up and reconciliation candidate assets. + +## Expected Delta + +Target delta is `+1 framework-auto-pass-candidate` if deterministic bootstrap target recovery is possible. Otherwise the delta is `0`, with a narrower named hold. + +## Stop Statement + +Stop after the single-scene follow-up and reconciliation candidates are published. Do not update the official board under this plan. diff --git a/docs/superpowers/plans/2026-04-19-bootstrap-target-residual-isolation-plan.md b/docs/superpowers/plans/2026-04-19-bootstrap-target-residual-isolation-plan.md new file mode 100644 index 0000000..f337337 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-bootstrap-target-residual-isolation-plan.md @@ -0,0 +1,38 @@ +# Bootstrap Target Residual Isolation Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md` +> Parent Route: `Residual Route D` +> Parent Layer: `Layer D` + +## Plan Intent + +Isolate the remaining page-state/bootstrap-target residual without starting login recovery or runtime navigation implementation. + +## Fixed Input Bucket + +1. `sweep-091-scene` / `用户停电频次分析监测` + +## Allowed Files + +1. isolation JSON asset +2. isolation report + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. login/runtime implementation files +4. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. preserve the residual as bootstrap-target isolated; +2. publish isolation report; +3. do not implement login recovery. + +## Stop Statement + +Stop after isolation assets are published. + diff --git a/docs/superpowers/plans/2026-04-19-boundary-fail-closed-decision-plan.md b/docs/superpowers/plans/2026-04-19-boundary-fail-closed-decision-plan.md new file mode 100644 index 0000000..ca376e5 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-boundary-fail-closed-decision-plan.md @@ -0,0 +1,55 @@ +# Boundary Fail-Closed Decision Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 5: boundary-family fail-closed` +> Parent Layer: `Layer C + Layer D` +> Upstream Design: `docs/superpowers/specs/2026-04-19-boundary-fail-closed-decision-design.md` + +## Plan Intent + +Publish a decision for the remaining boundary-family fail-closed buckets after mainline routes are complete or deferred. + +## Fixed Input Bucket + +1. `local_doc_pipeline = 5` +2. `host_bridge_workflow = 1` +3. `page_state_eval/bootstrap_target = 1` + +## Allowed Files + +1. boundary decision JSON assets +2. boundary decision report assets +3. optional next bounded boundary plan docs + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. freeze the Route 5 bucket state +2. inspect each boundary subgroup +3. decide defer/hold/open-slice +4. publish Route 5 decision report + +## Expected Coverage Delta + +Decision-only delta: + +1. unresolved boundary ambiguity should go to zero + +## Completion Criteria + +1. every Route 5 subgroup has a named decision +2. any follow-up bounded plan is explicit and optional + +## Stop Statement + +Stop after the Route 5 decision report is published. + +Do not begin boundary implementation under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md new file mode 100644 index 0000000..60d4af5 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md @@ -0,0 +1,139 @@ +# Boundary Family Real-Sample Entry Roadmap Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-boundary-family-real-sample-entry-roadmap-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-boundary-family-real-sample-entry-roadmap-design.md) + +## Plan Intent + +This roadmap determines the next bounded step after `G1-E / G2 / G3` have all closed as executed real-sample passes. + +Its only purpose is: + +`decide whether one boundary family may enter real-sample execution scope next` + +## Scope Guardrails + +1. do not reopen `G1-E / G2 / G3` +2. do not implement runtime-platform prerequisites under this roadmap +3. do not execute real samples for more than one boundary family +4. do not open `G4 / G5` +5. do not turn this work into a new family-asset expansion program + +## Candidate Boundary Families + +The only candidates under this roadmap are: + +1. `G6` +2. `G7` +3. `G8` + +## Workstreams + +1. `WS1` Freeze the Post-Mainline Starting State +2. `WS2` Evaluate Boundary-Family Entry Readiness +3. `WS3` Select One Next Candidate or Hold All +4. `WS4` Publish the Next Bounded Execution Slice + +## Phase 0: Freeze the Starting State + +### Objective + +Lock the roadmap start point so the decision does not drift back into old mainline work. + +### Tasks + +1. freeze `G1-E / G2 / G3` as closed executed passes +2. freeze `G6 / G7 / G8` as held boundary families +3. freeze `G4 / G5` as out of scope + +### Deliverables + +1. starting-state note +2. fixed candidate list + +### Acceptance Criteria + +1. no mainline or deferred family work is reopened under this roadmap + +## Phase 1: Evaluate Boundary-Family Entry Readiness + +### Objective + +Compare `G6 / G7 / G8` against explicit entry criteria instead of intuition. + +### Tasks + +1. restate the current entry condition for each boundary family +2. compare the required runtime gap for each family +3. estimate which family needs the smallest new capability to enter real-sample scope + +### Deliverables + +1. boundary-family comparison matrix +2. smallest-entry-cost summary + +### Acceptance Criteria + +1. the next candidate family can be justified with explicit criteria +2. the rejected families have explicit hold reasons + +## Phase 2: Select One Next Candidate or Hold All + +### Objective + +Reduce the next-step ambiguity to a single bounded decision. + +### Tasks + +1. select exactly one family as the next real-sample entry candidate +2. or explicitly conclude that all boundary families remain held +3. record why the non-selected families remain out of scope + +### Deliverables + +1. boundary-family entry decision +2. hold reasons for non-selected families + +### Acceptance Criteria + +1. no more than one next family is opened +2. the decision is bounded and defensible + +## Phase 3: Publish the Next Bounded Execution Slice + +### Objective + +Turn the decision into the next actionable bounded plan. + +### Tasks + +1. if one family is selected, write a bounded `design + plan` for its minimum real-sample entry slice +2. if none is selected, write a bounded prerequisites plan instead +3. update the decision report layer + +### Deliverables + +1. next-family bounded `design` +2. next-family bounded `plan` +3. roadmap closure report + +### Acceptance Criteria + +1. the next step is ready to execute without reopening roadmap scope +2. only one bounded direction is emitted + +## Completion Criteria + +This roadmap is complete when: + +1. the post-mainline next step is reduced to one bounded direction +2. `G6 / G7 / G8` no longer compete ambiguously for priority +3. a single follow-up `design + plan` exists for the selected direction + +## Next Step + +After this roadmap completes: + +1. execute the selected family-entry slice if one family is admitted +2. otherwise execute the bounded prerequisites slice before any boundary family enters real-sample scope diff --git a/docs/superpowers/plans/2026-04-19-boundary-residual-hold-decision-plan.md b/docs/superpowers/plans/2026-04-19-boundary-residual-hold-decision-plan.md new file mode 100644 index 0000000..f57ea1f --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-boundary-residual-hold-decision-plan.md @@ -0,0 +1,38 @@ +# Boundary Residual Hold Decision Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md` +> Parent Route: `Residual Route C` +> Parent Layer: `Layer D` + +## Plan Intent + +Decide whether the remaining `local_doc_pipeline` and `host_bridge_workflow` residual records should remain held or enter a future runtime roadmap. + +## Fixed Input Bucket + +1. five `local_doc_pipeline` residual records +2. one `host_bridge_workflow` residual record + +## Allowed Files + +1. decision JSON asset +2. decision report + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. classify each boundary residual as hold/defer/runtime-roadmap-input; +2. do not implement runtime support; +3. publish decision report. + +## Stop Statement + +Stop after decision assets are published. + diff --git a/docs/superpowers/plans/2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md new file mode 100644 index 0000000..0c3895f --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md @@ -0,0 +1,123 @@ +# Boundary Runtime Prerequisites Roadmap Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-boundary-runtime-prerequisites-roadmap-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-boundary-runtime-prerequisites-roadmap-design.md) + +## Plan Intent + +This roadmap determines the next bounded prerequisites slice after the post-`G7` boundary decision concludes that direct `G6` or `G8` execution should not start yet. + +Its only purpose is: + +`select one bounded prerequisite direction before the next boundary-family real-sample attempt` + +## Scope Guardrails + +1. do not execute `G6` or `G8` +2. do not reopen `G7` +3. do not reopen `G1-E / G2 / G3` +4. do not implement host-runtime or local-doc runtime under this roadmap +5. do not open `G4 / G5` + +## Candidate Prerequisite Directions + +The only candidates under this roadmap are: + +1. `G6 host-bridge prerequisites` +2. `G8 local-doc prerequisites` + +## Workstreams + +1. `WS1` Freeze the Post-G7 Boundary Hold State +2. `WS2` Compare G6 and G8 Prerequisite Burden +3. `WS3` Select One Prerequisite Direction +4. `WS4` Publish the Next Bounded Prerequisites Slice + +## Phase 0: Freeze the Starting State + +### Objective + +Lock the roadmap start point so no closed family work is reopened. + +### Tasks + +1. freeze `G7` as closed +2. freeze `G6` and `G8` as held pending prerequisites +3. freeze `G1-E / G2 / G3` as closed +4. freeze `G4 / G5` as out of scope + +### Deliverables + +1. starting-state note +2. fixed prerequisite candidate list + +### Acceptance Criteria + +1. no family execution begins under this roadmap + +## Phase 1: Compare Prerequisite Burden + +### Objective + +Compare `G6` and `G8` at the prerequisite level instead of at the execution level. + +### Tasks + +1. restate the smallest blocked capability for `G6` +2. restate the smallest blocked capability for `G8` +3. compare which prerequisite can be isolated more cleanly + +### Deliverables + +1. prerequisite comparison matrix +2. smallest-prerequisite summary + +### Acceptance Criteria + +1. the selected prerequisite direction is justified explicitly + +## Phase 2: Select One Prerequisite Direction + +### Objective + +Reduce the post-`G7` prerequisite ambiguity to one bounded decision. + +### Tasks + +1. select exactly one direction: + - `G6 host-bridge prerequisites` + - or `G8 local-doc prerequisites` +2. record why the other direction remains held + +### Deliverables + +1. prerequisite direction decision +2. hold reason for the non-selected direction + +### Acceptance Criteria + +1. only one next direction is opened +2. the decision is bounded and defensible + +## Phase 3: Publish the Next Bounded Slice + +### Objective + +Turn the decision into the next executable bounded artifact. + +### Tasks + +1. write one bounded follow-up design and plan for the selected prerequisite direction +2. publish a roadmap closure report + +### Deliverables + +1. next bounded `design` +2. next bounded `plan` +3. roadmap closure report + +### Acceptance Criteria + +1. the next step is ready without extending this roadmap +2. only one bounded direction is emitted diff --git a/docs/superpowers/plans/2026-04-19-final-2-official-board-reconciliation-refresh-plan.md b/docs/superpowers/plans/2026-04-19-final-2-official-board-reconciliation-refresh-plan.md new file mode 100644 index 0000000..9d97a20 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-final-2-official-board-reconciliation-refresh-plan.md @@ -0,0 +1,54 @@ +# Final 2 Official Board Reconciliation Refresh Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Status: Draft + +## Plan Intent + +Refresh official board framework fields after one or both final-2 residual roadmaps publish reconciliation candidates. + +## Fixed Inputs + +At least one of: + +1. `tests/fixtures/generated_scene/bootstrap_target_normalization_reconciliation_candidates_2026-04-19.json` +2. `tests/fixtures/generated_scene/host_bridge_runtime_reconciliation_candidates_2026-04-19.json` + +Also required: + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` + +## Allowed Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/final_2_official_board_reconciliation_refresh_2026-04-19.json` +3. `docs/superpowers/reports/2026-04-19-final-2-official-board-reconciliation-refresh-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` + +## Tasks + +1. Load candidate assets that exist. +2. Verify each candidate belongs to `sweep-085-scene` or `sweep-091-scene`. +3. Match board rows by `sceneId`. +4. Update only framework-layer fields. +5. Recompute board framework summary. +6. Publish reconciliation refresh JSON and report. + +## Expected Delta + +Delta depends on candidate assets: + +1. one closed residual: `framework-auto-pass +1`, `framework-structured-fail-closed -1` +2. both closed residuals: `framework-auto-pass +2`, `framework-structured-fail-closed -2` +3. held residuals: no count delta, but narrower next action / hold reason + +## Stop Statement + +Stop after the final-2 board reconciliation refresh JSON and report are published. diff --git a/docs/superpowers/plans/2026-04-19-final-2-residual-child-plan-sequence-plan.md b/docs/superpowers/plans/2026-04-19-final-2-residual-child-plan-sequence-plan.md new file mode 100644 index 0000000..5dcac40 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-final-2-residual-child-plan-sequence-plan.md @@ -0,0 +1,47 @@ +# Final 2 Residual Child Plan Sequence Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer E / Route 5 + Route 6` +> Status: Draft + +## Plan Intent + +Create the remaining child-plan sequence for the last two framework structured fail-closed residuals. This plan only defines the sequence and child plan boundaries; it does not execute implementation. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/local_doc_official_board_reconciliation_refresh_2026-04-19.json` + +## Fixed Residual Bucket + +1. `sweep-085-scene`: `host_bridge_workflow`, `future-host-bridge-runtime-roadmap-input` +2. `sweep-091-scene`: `page_state_eval`, `future-bootstrap-target-normalization-roadmap-input` + +## Child Plans + +1. `2026-04-19-final-2-residual-roadmap-prioritization-plan.md` +2. `2026-04-19-bootstrap-target-normalization-roadmap-plan.md` +3. `2026-04-19-host-bridge-runtime-roadmap-plan.md` +4. `2026-04-19-final-2-official-board-reconciliation-refresh-plan.md` +5. `2026-04-19-102-framework-closure-rollup-plan.md` + +## Scope Guardrails + +1. Do not modify `analyzer.rs`. +2. Do not modify `generator.rs`. +3. Do not update the official board under this sequence-definition plan. +4. Do not run a full 102 sweep under this plan. +5. Do not reopen G1-E, G2, G3, or local-doc runtime work. +6. Do not continue the old G6 micro-plan chain. + +## Completion Criteria + +1. The final-2 residual child plan sequence exists. +2. Each child plan declares parent route, fixed input bucket, allowed files, forbidden files, expected delta, and stop statement. +3. The next executable child plan is the prioritization plan. + +## Stop Statement + +Stop after the final-2 child plan sequence is created. Do not execute any child plan under this sequence-definition plan. diff --git a/docs/superpowers/plans/2026-04-19-final-2-residual-roadmap-prioritization-plan.md b/docs/superpowers/plans/2026-04-19-final-2-residual-roadmap-prioritization-plan.md new file mode 100644 index 0000000..065463c --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-final-2-residual-roadmap-prioritization-plan.md @@ -0,0 +1,43 @@ +# Final 2 Residual Roadmap Prioritization Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Status: Draft + +## Plan Intent + +Select the next residual roadmap from the final two structured fail-closed records. + +## Fixed Input Bucket + +1. `sweep-085-scene`: host-bridge runtime residual +2. `sweep-091-scene`: bootstrap target normalization residual + +## Allowed Files + +1. `tests/fixtures/generated_scene/final_2_residual_roadmap_prioritization_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-final-2-residual-roadmap-prioritization-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. Load the current official board. +2. Extract the two residuals. +3. Score bootstrap normalization vs host-bridge runtime. +4. Select exactly one first roadmap. +5. Publish decision JSON. +6. Publish decision report. + +## Expected Delta + +No coverage delta. This is a decision-only plan. + +## Stop Statement + +Stop after the prioritization asset and report are published. Do not start the selected roadmap under this plan. diff --git a/docs/superpowers/plans/2026-04-19-g1e-remaining-fail-closed-closure-plan.md b/docs/superpowers/plans/2026-04-19-g1e-remaining-fail-closed-closure-plan.md new file mode 100644 index 0000000..69e5c65 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g1e-remaining-fail-closed-closure-plan.md @@ -0,0 +1,55 @@ +# G1-E Remaining Fail-Closed Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 4: G1-E / single_request_enrichment` +> Parent Layer: `Layer C + Layer D` +> Upstream Design: `docs/superpowers/specs/2026-04-19-g1e-remaining-fail-closed-closure-design.md` + +## Plan Intent + +Implement one bounded correction slice for the remaining Route 4 `G1-E` fail-closed records. + +## Fixed Input Bucket + +`single_request_enrichment = 2` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. Route 4 local inventory and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 2 and Route 3 assets +3. Route 5+ assets + +## Tasks + +1. freeze the two Route 4 records +2. confirm the repeated missing contract +3. implement one bounded `G1-E` correction slice +4. rerun bounded validation +5. publish Route 4 delta + +## Expected Coverage Delta + +1. reduce the `G1-E` fail-closed bucket +2. preserve current `G1-E` real-sample pass and canonical stability + +## Completion Criteria + +1. Route 4 bucket has measured before/after status +2. Route 4 is closed or deferred + +## Stop Statement + +Stop after Route 4 delta is measured. + +Do not begin Route 5 under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-g2-real-sample-contract-correction-plan.md b/docs/superpowers/plans/2026-04-19-g2-real-sample-contract-correction-plan.md new file mode 100644 index 0000000..6e6eb85 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g2-real-sample-contract-correction-plan.md @@ -0,0 +1,185 @@ +# G2 Real Sample Contract Correction Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g2-real-sample-contract-correction-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g2-real-sample-contract-correction-design.md) +> Trigger Record: `rsv-g2-001` + +## Plan Intent + +This plan implements one bounded mainline correction slice: + +`G2 real-sample contract correction` + +Its purpose is to reduce the current real-sample `G2` mismatch from the broad bundle: + +1. `bootstrap_mismatch` +2. `request_contract_missing` +3. `column_defs_missing` +4. `output correctness not closed` + +into either: + +1. a verified pass +2. or a smaller named contract mismatch + +## Scope Guardrails + +1. do not reopen completed `G2` family expansion work +2. do not add new `G2` fixtures or promote new `G2` candidates +3. do not reopen `G3`, `G1-E`, or boundary families +4. do not turn this work into login recovery or broader runtime-platform implementation +5. do not update validation assets until the real-sample outcome becomes narrower than the current broad mismatch bundle + +## Fixed Verification Anchor + +The only anchor under this plan is: + +1. `台区线损大数据-月_周累计线损率统计分析` + +Mapped real-sample record: + +1. `rsv-g2-001` + +## Workstreams + +1. `WS1` Real-Sample Contract Differential +2. `WS2` Bootstrap and Request Contract Narrowing +3. `WS3` Column and Output Contract Narrowing +4. `WS4` Regression, Rerun, and Validation Closure + +## Phase 0: Freeze the Correction Boundary + +### Objective + +Lock the scope to the fixed `G2` real sample and its remaining contract gaps. + +### Tasks + +1. freeze `rsv-g2-001` as the only real-sample correction target +2. freeze the current mismatch bundle from the validation layer +3. freeze `G2` family-expansion outputs as completed and out of scope + +### Deliverables + +1. correction-boundary note +2. fixed mismatch statement + +### Acceptance Criteria + +1. no new `G2` family-expansion task is opened +2. the correction target is explicitly limited to real-sample contract closure + +## Phase 1: Build the Real-Sample Contract Differential + +### Objective + +Make the smallest remaining real-sample contract mismatch explicit before code changes. + +### Tasks + +1. compare the current real generated `SceneIr` against the intended `tq-lineloss-report` contract +2. isolate whether the dominant remaining gap is: + - bootstrap target selection + - per-mode request template completeness + - output column semantics + - output artifact correctness +3. write a minimum contract-gap summary + +### Deliverables + +1. contract differential note +2. minimum gap summary + +### Acceptance Criteria + +1. the smallest remaining `G2` mismatch is explicit +2. the next implementation target is narrower than the current broad mismatch bundle + +## Phase 2: Narrow Bootstrap and Request Contract Gaps + +### Objective + +Correct only the bootstrap and request-side contract pieces that the real sample proves are still too coarse. + +### Tasks + +1. adjust `G2` bootstrap resolution only where the real sample proves it is still misaligned +2. adjust mode-specific request contract recovery only where the real sample proves it is still incomplete +3. preserve fail-closed behavior for unresolved `G2` variants + +### Deliverables + +1. bounded bootstrap correction +2. bounded request-contract correction + +### Acceptance Criteria + +1. the real sample no longer keeps the same broad bootstrap/request mismatch shape +2. unrelated `G2` family fixtures are not broadened or reclassified + +## Phase 3: Narrow Column and Output Contract Gaps + +### Objective + +Reduce the remaining output-side mismatch to a verified or smaller state. + +### Tasks + +1. adjust `G2` column-definition recovery only where the real sample proves it is still incomplete +2. adjust output-contract verification only where the real sample proves the generated artifact is too coarse +3. keep readiness and fail-closed behavior intact for still-unresolved samples + +### Deliverables + +1. bounded column-contract correction +2. bounded output-contract correction + +### Acceptance Criteria + +1. the real-sample mismatch becomes narrower than the current broad bundle +2. `G2` does not regress into false positives for unresolved variants + +## Phase 4: Regression, Rerun, and Validation Closure + +### Objective + +Use rerun and validation-layer updates to close the bounded `G2` correction loop. + +### Tasks + +1. add or update regression that names the corrected `G2` real-sample pattern +2. rerun the fixed real sample +3. record whether: + - the sample becomes `executed-pass` + - or the remaining mismatch is now smaller and named +4. update the validation-layer assets +5. write a formal closure report + +### Deliverables + +1. rerun output +2. updated validation assets +3. `G2` real-sample contract-correction closure report + +### Acceptance Criteria + +1. `rsv-g2-001` no longer remains unchanged as the same broad mismatch bundle +2. the narrowed outcome is covered by automated regression +3. validation assets record the narrower `G2` state + +## Completion Criteria + +This plan is complete when: + +1. the fixed `G2` real sample no longer remains at the same broad mismatch bundle +2. the narrower result is covered by automated regression +3. validation assets are updated with the narrowed outcome +4. completed `G2` family-expansion work remains untouched + +## Next Step + +After this plan completes: + +1. if `G2` becomes `executed-pass`, mainline real-sample pressure leaves both `G2` and `G3` +2. if `G2` still has a smaller named mismatch, move only to that narrower `G2` correction slice diff --git a/docs/superpowers/plans/2026-04-19-g2-remaining-fail-closed-closure-plan.md b/docs/superpowers/plans/2026-04-19-g2-remaining-fail-closed-closure-plan.md new file mode 100644 index 0000000..1722a66 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g2-remaining-fail-closed-closure-plan.md @@ -0,0 +1,55 @@ +# G2 Remaining Fail-Closed Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 3: G2 / multi_mode_request` +> Parent Layer: `Layer C + Layer D` +> Upstream Design: `docs/superpowers/specs/2026-04-19-g2-remaining-fail-closed-closure-design.md` + +## Plan Intent + +Implement one bounded correction slice for the remaining Route 3 `G2` fail-closed records. + +## Fixed Input Bucket + +`multi_mode_request = 4` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. Route 3 local inventory and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 2 assets +3. Route 4+ assets + +## Tasks + +1. freeze the four Route 3 records +2. confirm the repeated missing contract +3. implement one bounded `G2` correction slice +4. rerun bounded validation +5. publish Route 3 delta + +## Expected Coverage Delta + +1. reduce the `multi_mode_request` fail-closed bucket +2. protect current `G2` real-sample pass and canonical stability + +## Completion Criteria + +1. Route 3 bucket has measured before/after status +2. Route 3 is closed or explicitly deferred + +## Stop Statement + +Stop after Route 3 delta is measured. + +Do not begin Route 4 under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-g2-residual-2-readiness-closure-plan.md b/docs/superpowers/plans/2026-04-19-g2-residual-2-readiness-closure-plan.md new file mode 100644 index 0000000..f388e33 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g2-residual-2-readiness-closure-plan.md @@ -0,0 +1,48 @@ +# G2 Residual 2 Readiness Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md` +> Parent Route: `Residual Route B` +> Parent Layer: `Layer C` + +## Plan Intent + +Close the `2` remaining `G2 / multi_mode_request` structured fail-closed records by correcting bounded readiness or contract interpretation. + +## Fixed Input Bucket + +1. `sweep-018-scene` / `白银线损周报` +2. `sweep-071-scene` / `台区线损大数据-月_周累计线损率统计分析` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/scene_generator_test.rs` +4. route-local follow-up JSON/report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. G3/G6/G8 route code unless required to preserve regression tests +3. family baseline manifests + +## Tasks + +1. inspect the two fixed G2 residuals; +2. determine whether readiness labels `02` and `00` are report parsing artifacts or real contract gaps; +3. implement one bounded G2 correction if justified; +4. rerun only the two fixed scenes; +5. publish delta report. + +## Expected Coverage Delta + +Target: reduce the `2` G2 residual fail-closed records. + +## Stop Statement + +Stop after the two-scene route-local follow-up and report. + +Do not continue into G1-E, G3, or boundary work. + diff --git a/docs/superpowers/plans/2026-04-19-g3-enrichment-request-closure-plan.md b/docs/superpowers/plans/2026-04-19-g3-enrichment-request-closure-plan.md new file mode 100644 index 0000000..25832a1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-enrichment-request-closure-plan.md @@ -0,0 +1,56 @@ +# G3 Enrichment Request Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` +> Parent Layer: `Layer C + Layer D` +> Upstream Design: `docs/superpowers/specs/2026-04-19-g3-enrichment-request-closure-design.md` + +## Plan Intent + +Implement the first bounded `G3` contract-recovery slice by recovering repeated enrichment-request and secondary-request evidence gaps inside the remaining `paginated_enrichment` fail-closed bucket. + +## Fixed Input Bucket + +`paginated_enrichment + g3_enrichment_contract + secondary_request` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local follow-up JSON and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 3+ plan files +3. family promotion assets + +## Tasks + +1. freeze the targeted `G3` subgroup from the current follow-up asset +2. confirm the repeated enrichment-request missing pattern +3. implement one bounded contract-recovery slice +4. rerun only the bounded validation needed by this subgroup +5. publish subgroup delta and residual subgroup count + +## Expected Coverage Delta + +1. reduce the count of `paginated_enrichment` fail-closed records caused primarily by enrichment-request closure failure +2. do not reduce canonical or real-sample `G3` pass stability + +## Completion Criteria + +1. targeted subgroup has a measured before/after count +2. remaining unresolved Route 2 issues are explicitly handed to the next child plan +3. no route drift into `host_bridge_workflow` + +## Stop Statement + +Stop after the targeted enrichment-request subgroup has been corrected or explicitly bounded as residual. + +Do not continue into export-plan closure work under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-g3-export-plan-closure-plan.md b/docs/superpowers/plans/2026-04-19-g3-export-plan-closure-plan.md new file mode 100644 index 0000000..635a711 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-export-plan-closure-plan.md @@ -0,0 +1,55 @@ +# G3 Export Plan Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` +> Parent Layer: `Layer C + Layer D` +> Upstream Design: `docs/superpowers/specs/2026-04-19-g3-export-plan-closure-design.md` + +## Plan Intent + +Implement the second bounded `G3` contract-recovery slice by recovering repeated export-plan evidence gaps inside the remaining `paginated_enrichment` fail-closed bucket. + +## Fixed Input Bucket + +`paginated_enrichment + g3_export_plan + export_plan` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local follow-up JSON and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 3+ plan files +3. promotion policy assets + +## Tasks + +1. freeze the targeted export-plan subgroup +2. confirm repeated `export_plan` and `g3_export_plan` missing pattern +3. implement one bounded export-plan recovery slice +4. rerun bounded validation only for this subgroup +5. publish delta and residual Route 2 inventory + +## Expected Coverage Delta + +1. reduce the count of `paginated_enrichment` records whose primary blocker is export-plan absence +2. preserve stable `G3` canonical and real-sample anchors + +## Completion Criteria + +1. export-plan subgroup count is lower or more narrowly classified +2. residual Route 2 bucket is explicitly measured + +## Stop Statement + +Stop after the export-plan subgroup has been rerun and measured. + +Do not continue into Route 2 residual closure under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-g3-real-sample-archetype-correction-plan.md b/docs/superpowers/plans/2026-04-19-g3-real-sample-archetype-correction-plan.md new file mode 100644 index 0000000..32d7b02 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-real-sample-archetype-correction-plan.md @@ -0,0 +1,171 @@ +# G3 Real Sample Archetype Correction Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g3-real-sample-archetype-correction-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g3-real-sample-archetype-correction-design.md) +> Trigger Report: [2026-04-19-g3-real-sample-execution-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md) + +## Plan Intent + +This plan implements the next bounded scope selected by the real-sample validation roadmap: + +`mainline G3 real-sample archetype correction` + +Its purpose is to correct the routing boundary that currently makes the real sample `95598工单明细表` collapse into `local_doc_pipeline`. + +## Scope Guardrails + +1. Do not reopen the completed `G3` repo-local family expansion program. +2. Do not broaden this work into `G8` runtime implementation. +3. Do not open `G4 / G5`. +4. Do not add new family-expansion fixtures unrelated to the real-sample mismatch. +5. Do not weaken fail-closed behavior in order to force a pass result. +6. Do not treat generic asset updates as progress unless they directly unblock the real-sample rerun. + +## Workstreams + +1. `WS1` Real-Sample Evidence Differential +2. `WS2` G3-vs-G8 Routing Boundary Correction +3. `WS3` Regression and Fail-Closed Integrity +4. `WS4` Real-Sample Rerun and Closure + +## Phase 0: Freeze the Correction Boundary + +### Objective + +Lock the scope to one mismatch: the `G3` real sample being misrouted into `G8`. + +### Tasks + +1. freeze `95598工单明细表` as the only real-sample correction anchor +2. freeze the current observed mismatch: + - `archetype_mismatch` + - `evidence_not_closed` +3. freeze current `G8` behavior as a boundary-family constraint that must not regress + +### Deliverables + +1. correction-boundary note +2. fixed anchor and mismatch statement + +### Acceptance Criteria + +1. no additional family or runtime scope is added under this plan +2. the correction target is explicitly `G3 vs G8` routing + +## Phase 1: Build the Real-Sample Evidence Differential + +### Objective + +Understand why the real sample routes differently from the repo-local `G3` baseline. + +### WS1 Tasks + +1. compare repo-local `G3` canonical evidence against real-sample deterministic facts +2. isolate which evidence currently drives `local_doc_pipeline` +3. isolate which `G3` business-chain signals are present but losing in routing +4. write a differential summary that identifies the minimum routing fix + +### Deliverables + +1. evidence differential note +2. real-sample routing-pressure summary + +### Acceptance Criteria + +1. the team can point to the specific evidence classes causing `G8` to win +2. the minimum routing correction is explicit before code changes start + +## Phase 2: Correct the G3-vs-G8 Routing Boundary + +### Objective + +Change routing so recoverable `G3` business-chain evidence outranks `G8` local-pipeline evidence for this mismatch class. + +### WS2 Tasks + +1. tighten the `local_doc_pipeline` trigger threshold for mixed-evidence scenes +2. raise the priority of `G3` when: + - main request exists + - pagination contract is recoverable + - enrichment or detail chain exists +3. keep `G8` routing only when local pipeline evidence is still the dominant workflow backbone +4. preserve fail-closed behavior if the sample still does not satisfy the `G3` minimum contract after routing correction + +### Deliverables + +1. analyzer and generator routing update +2. explicit `G3 vs G8` routing rule in code comments or tests where needed + +### Acceptance Criteria + +1. the real-sample mismatch no longer defaults to `local_doc_pipeline` +2. `G8` representative classification remains intact +3. incomplete `G3` still fail-closes without pseudo-runnable output + +## Phase 3: Lock Regression and Fail-Closed Integrity + +### Objective + +Prove the correction does not trade one false-positive for another. + +### WS3 Tasks + +1. add deterministic regression for the mixed `G3/G8` evidence pattern +2. add generator regression showing the corrected route stays inside `G3` +3. retain or strengthen `G8` regression so the boundary family does not collapse +4. verify that unresolved `G3` cases still fail closed for `G3` reasons + +### Deliverables + +1. regression tests for `G3 vs G8` +2. updated validation fixtures or assertions as needed + +### Acceptance Criteria + +1. no regression causes `G8` to disappear as a boundary archetype +2. no regression reintroduces a false-positive runnable skill +3. test coverage explicitly names the corrected mismatch pattern + +## Phase 4: Rerun the Real Sample and Close the Loop + +### Objective + +Use the actual real sample to confirm the correction outcome and record the next state. + +### WS4 Tasks + +1. rerun `sg_scene_generate` on `95598工单明细表` +2. record whether the sample now: + - resolves as `paginated_enrichment` + - or fail-closes inside `G3` +3. update the real-sample validation record layer +4. write a formal correction closure report + +### Deliverables + +1. rerun output +2. updated real-sample validation assets +3. `G3` archetype-correction closure report + +### Acceptance Criteria + +1. the rerun no longer reports `local_doc_pipeline` as the controlling archetype +2. the validation layer records the corrected family outcome +3. the next scope recommendation can move from `G3 archetype correction` to the next remaining mainline gap + +## Completion Criteria + +This plan is complete when: + +1. the `G3` real sample no longer collapses into `local_doc_pipeline` +2. the corrected route is covered by automated regression +3. real-sample validation assets are updated with the new outcome +4. `G8` remains a valid boundary-family archetype with no unintended regression + +## Next Step + +After this plan completes: + +1. if `G3` real-sample routing is corrected and still shows a `G3` contract gap, move to `G3` real-sample contract correction +2. if `G3` stabilizes, return to the next mainline mismatch in priority order, which is `G2` real-sample contract correction diff --git a/docs/superpowers/plans/2026-04-19-g3-real-sample-output-contract-verification-plan.md b/docs/superpowers/plans/2026-04-19-g3-real-sample-output-contract-verification-plan.md new file mode 100644 index 0000000..339565b --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-real-sample-output-contract-verification-plan.md @@ -0,0 +1,173 @@ +# G3 Real Sample Output Contract Verification Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g3-real-sample-output-contract-verification-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g3-real-sample-output-contract-verification-design.md) +> Trigger Report: [2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md) + +## Plan Intent + +This plan implements the next bounded mainline scope after `G3` runtime-scope correction: + +`G3 real-sample output / contract verification` + +Its purpose is to reduce the remaining real-sample mismatch from a generic verification gap to either: + +1. a verified pass +2. or a smaller named output/contract mismatch + +## Scope Guardrails + +1. do not reopen the completed `G3` archetype-correction scope +2. do not reopen the completed `G3` runtime-scope correction scope +3. do not broaden this work into `G8` runtime implementation +4. do not reopen `G3` family expansion or add unrelated fixtures +5. do not open `G4 / G5` +6. do not update validation assets until output verification produces a narrower outcome + +## Workstreams + +1. `WS1` Real-Sample Output Contract Differential +2. `WS2` G3 Output Contract Narrowing +3. `WS3` Regression and Verification Integrity +4. `WS4` Real-Sample Verification Rerun and Closure + +## Phase 0: Freeze the Verification Boundary + +### Objective + +Lock the scope to one remaining mismatch: `output_contract_not_verified`. + +### Tasks + +1. freeze `95598工单明细表` as the only verification anchor +2. freeze current remaining mismatch: + - `output_contract_not_verified` +3. freeze current `G3` routing and runtime-scope behavior as completed constraints that must not regress + +### Deliverables + +1. verification-boundary note +2. fixed output-gap statement + +### Acceptance Criteria + +1. no additional family or runtime scope is added under this plan +2. the correction target is explicitly `G3 output / contract verification` + +## Phase 1: Build the Real-Sample Output Contract Differential + +### Objective + +Understand exactly what part of the generated real-sample contract is still unverified. + +### Tasks + +1. compare the real generated `SceneIr` against the intended `G3` business output contract +2. isolate which fields are structurally present but semantically too broad +3. isolate whether the dominant gap is: + - main request selection + - enrichment request partitioning + - join key correctness + - merge/dedupe correctness + - export contract correctness +4. write a minimum verification-gap summary before code changes begin + +### Deliverables + +1. output-contract differential note +2. minimum verification-gap summary + +### Acceptance Criteria + +1. the smallest remaining output mismatch is explicit +2. the next change target is narrower than the current generic verification label + +## Phase 2: Narrow the G3 Output Contract Gap + +### Objective + +Reduce the real-sample mismatch from generic non-verified output to a specific verified contract state. + +### Tasks + +1. adjust the minimum `G3` output-contract logic only where the real sample proves it is too coarse +2. keep routing and runtime-scope logic unchanged unless required by output verification +3. preserve fail-closed behavior for scenes whose output contract is still unresolved + +### Deliverables + +1. bounded output-contract update +2. explicit verification rule in code or tests where needed + +### Acceptance Criteria + +1. the real-sample mismatch is narrower than `output_contract_not_verified` +2. no unrelated family is reclassified or broadened +3. the corrected result stays inside `G3` + +## Phase 3: Lock Regression and Verification Integrity + +### Objective + +Prove the narrower contract logic does not create false positives. + +### Tasks + +1. add or update regression that names the corrected real-sample verification pattern +2. retain mixed-boundary, `G8`, and canonical regressions +3. verify unresolved `G3` cases still fail closed when the output contract is genuinely incomplete + +### Deliverables + +1. regression tests for `G3` output verification +2. updated assertions where needed + +### Acceptance Criteria + +1. no regression causes `G8` to disappear as a boundary archetype +2. no regression causes unrelated `single_request_table` or other families to drift +3. test coverage explicitly names the corrected output-verification pattern + +## Phase 4: Rerun the Real Sample and Close the Loop + +### Objective + +Use the actual real sample to confirm the narrowed output-verification outcome and record the next state. + +### Tasks + +1. rerun `sg_scene_generate` on `95598工单明细表` +2. record whether: + - the sample becomes `executed-pass` + - or the remaining mismatch is narrower than `output_contract_not_verified` +3. update the real-sample validation record layer +4. write a formal closure report + +### Deliverables + +1. rerun output +2. updated real-sample validation assets +3. `G3` output-contract-verification closure report + +### Acceptance Criteria + +1. the rerun no longer leaves the generic `output_contract_not_verified` label unchanged +2. the validation layer records a narrower family outcome +3. the next scope recommendation can move from `G3` to the next mainline gap when appropriate + +## Completion Criteria + +This plan is complete when: + +1. the `G3` real sample no longer ends at the generic `output_contract_not_verified` label +2. the narrowed result is covered by automated regression +3. real-sample validation assets are updated with the new outcome +4. `G8` and prior `G3` routing/runtime corrections remain intact + +## Next Step + +After this plan completes: + +1. if `G3` becomes `executed-pass`, return to the next mainline mismatch in priority order, which is `G2` real-sample contract correction +2. if `G3` still has a smaller output-specific mismatch, move only to that narrower `G3` verification slice diff --git a/docs/superpowers/plans/2026-04-19-g3-real-sample-runtime-contract-correction-plan.md b/docs/superpowers/plans/2026-04-19-g3-real-sample-runtime-contract-correction-plan.md new file mode 100644 index 0000000..8ca9ab6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-real-sample-runtime-contract-correction-plan.md @@ -0,0 +1,166 @@ +# G3 Real Sample Runtime Contract Correction Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g3-real-sample-runtime-contract-correction-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g3-real-sample-runtime-contract-correction-design.md) +> Trigger Report: [2026-04-19-g3-real-sample-archetype-correction-closure-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g3-real-sample-archetype-correction-closure-report.md) + +## Plan Intent + +This plan implements the next bounded scope selected after `G3` archetype correction: + +`mainline G3 real-sample runtime / contract correction` + +Its purpose is to narrow the remaining real-sample gap from a coarse runtime-scope failure to the smallest accurate contract state. + +## Scope Guardrails + +1. do not reopen the completed `G3` archetype-correction scope +2. do not broaden this work into `G8` runtime implementation +3. do not open `G4 / G5` +4. do not add new family-expansion fixtures unrelated to the real-sample mismatch +5. do not weaken fail-closed behavior for incomplete `G3` scenes +6. do not update validation assets until the rerun result changes + +## Workstreams + +1. `WS1` Runtime-Scope Differential +2. `WS2` G3 Runtime-Scope Gate Narrowing +3. `WS3` Regression and Fail-Closed Integrity +4. `WS4` Real-Sample Rerun and Closure + +## Phase 0: Freeze the Correction Boundary + +### Objective + +Lock the scope to one remaining mismatch: `G3` real-sample runtime scope compatibility. + +### Tasks + +1. freeze `95598工单明细表` as the only correction anchor +2. freeze current remaining mismatch: + - `runtime_scope_gap` + - `output_contract_not_verified` +3. freeze current `G8` behavior as a boundary-family constraint that must not regress + +### Deliverables + +1. correction-boundary note +2. fixed runtime-gap statement + +### Acceptance Criteria + +1. no additional family or runtime scope is added under this plan +2. the correction target is explicitly `G3 runtime scope`, not a broader runtime program + +## Phase 1: Build the Runtime-Scope Differential + +### Objective + +Understand why the current gate still marks the real sample as runtime-incompatible. + +### Tasks + +1. compare current `G3` runtime-scope gate logic against the corrected real-sample evidence +2. isolate which localhost evidence should remain subordinate +3. isolate what dominant-runtime pattern should still fail closed +4. write a minimum gate-narrowing summary before code changes begin + +### Deliverables + +1. runtime-scope differential note +2. gate-narrowing summary + +### Acceptance Criteria + +1. the minimum change to `g3_runtime_scope_compatible` is explicit +2. the team can distinguish subordinate host-runtime evidence from dominant runtime takeover + +## Phase 2: Narrow the G3 Runtime-Scope Gate + +### Objective + +Allow valid `G3` real samples with subordinate localhost evidence to stay runtime-compatible. + +### Tasks + +1. narrow `g3_runtime_scope_compatible` so it considers business-chain dominance, not only localhost evidence count +2. preserve fail-closed behavior for scenes whose business chain is still not dominant +3. keep `G8` representative behavior intact + +### Deliverables + +1. generator gate update +2. explicit regression rule for subordinate localhost evidence inside `G3` + +### Acceptance Criteria + +1. the corrected real sample no longer fails the runtime-scope gate for the old coarse reason +2. `G8` representative classification remains intact +3. incomplete `G3` scenes still fail closed for `G3` reasons + +## Phase 3: Lock Regression and Fail-Closed Integrity + +### Objective + +Prove the narrowed gate does not create a pseudo-runnable class of scenes. + +### Tasks + +1. add regression for real-sample-like `G3` with subordinate localhost evidence +2. retain `G8` regression and mixed-boundary regression +3. verify unresolved `G3` scenes still fail closed when business-chain dominance is absent + +### Deliverables + +1. regression tests for `G3 runtime scope` +2. updated assertions where needed + +### Acceptance Criteria + +1. no regression causes `G8` to disappear as a boundary archetype +2. no regression causes unrelated `single_request_table` or other families to drift +3. test coverage explicitly names the corrected runtime-scope pattern + +## Phase 4: Rerun the Real Sample and Close the Loop + +### Objective + +Use the actual real sample to confirm the narrowed runtime-scope outcome and record the next state. + +### Tasks + +1. rerun `sg_scene_generate` on `95598工单明细表` +2. record whether: + - `g3_runtime_scope_compatible` now passes + - remaining mismatch, if any, is narrower than runtime-scope failure +3. update the real-sample validation record layer +4. write a formal closure report + +### Deliverables + +1. rerun output +2. updated real-sample validation assets +3. `G3` runtime-contract-correction closure report + +### Acceptance Criteria + +1. the rerun no longer fails for `g3_runtime_scope` +2. the validation layer records the narrowed family outcome +3. the next scope recommendation can move from `G3 runtime correction` to the next remaining mainline gap + +## Completion Criteria + +This plan is complete when: + +1. the `G3` real sample no longer fails for the old coarse runtime-scope reason +2. the narrowed gate is covered by automated regression +3. real-sample validation assets are updated with the new outcome +4. `G8` remains a valid boundary-family archetype with no unintended regression + +## Next Step + +After this plan completes: + +1. if `G3` still has a narrower output or data-verification gap, move to `G3` real-sample output or contract verification +2. if `G3` stabilizes, return to the next mainline mismatch in priority order, which is `G2` real-sample contract correction diff --git a/docs/superpowers/plans/2026-04-19-g3-residual-4-workflow-evidence-closure-plan.md b/docs/superpowers/plans/2026-04-19-g3-residual-4-workflow-evidence-closure-plan.md new file mode 100644 index 0000000..49f78f6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-residual-4-workflow-evidence-closure-plan.md @@ -0,0 +1,50 @@ +# G3 Residual 4 Workflow Evidence Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md` +> Parent Route: `Residual Route A` +> Parent Layer: `Layer C` + +## Plan Intent + +Close the `4` remaining `G3 / paginated_enrichment` structured fail-closed scenes by recovering missing workflow evidence without relaxing gates. + +## Fixed Input Bucket + +1. `sweep-007-scene` / `95598供电服务月报` +2. `sweep-039-scene` / `故障报修工单信息统计表` +3. `sweep-068-scene` / `输变电设备运行分析报告` +4. `sweep-084-scene` / `巡视计划完成情况自动检索` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/scene_generator_test.rs` +4. route-local follow-up JSON/report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. family baseline manifests +3. G6/G8 runtime implementation files + +## Tasks + +1. inspect the four fixed reports and source scenes; +2. identify the repeated missing G3 evidence subtype; +3. implement one bounded G3 recovery slice; +4. rerun only the four fixed scenes; +5. publish delta report. + +## Expected Coverage Delta + +Target: reduce the `4` G3 residual fail-closed records. + +## Stop Statement + +Stop after the four-scene route-local follow-up and report. + +Do not continue into G2 or boundary residual work. + diff --git a/docs/superpowers/plans/2026-04-19-g3-residual-contract-closure-plan.md b/docs/superpowers/plans/2026-04-19-g3-residual-contract-closure-plan.md new file mode 100644 index 0000000..ff7010b --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g3-residual-contract-closure-plan.md @@ -0,0 +1,54 @@ +# G3 Residual Contract Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` +> Parent Layer: `Layer C + Layer D` +> Upstream Design: `docs/superpowers/specs/2026-04-19-g3-residual-contract-closure-design.md` + +## Plan Intent + +Implement the final bounded Route 2 slice for any `G3` residual contract blockers left after enrichment-request and export-plan closure work. + +## Fixed Input Bucket + +Residual `G3 / paginated_enrichment` bucket after the first two Route 2 child plans. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local residual inventory and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 3+ implementation assets + +## Tasks + +1. freeze post-Route-2 residual inventory +2. group residual blockers +3. implement at most one bounded residual correction slice +4. rerun bounded validation +5. declare Route 2 complete or deferred + +## Expected Coverage Delta + +1. shrink or explicitly name the final residual `G3` bucket +2. produce a clean handoff into Route 3 + +## Completion Criteria + +1. Route 2 is no longer open-ended +2. remaining residual `G3` records are explicitly categorized + +## Stop Statement + +Stop after Route 2 is explicitly closed or deferred. + +Do not begin Route 3 work under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-semantics-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-semantics-plan.md new file mode 100644 index 0000000..c5807fd --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-semantics-plan.md @@ -0,0 +1,72 @@ +# G6 Host-Bridge Callback Semantics Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-callback-semantics-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-semantics-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge callback semantics` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly +3. do not open `G8` +4. do not reopen `G7` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze callback semantics scope +2. `WS2` Define completion-state semantics +3. `WS3` Publish one bounded callback-semantic result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to callback semantics only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze transport/runtime implementation and real execution as out of scope + +### Acceptance Criteria + +1. no broader host-runtime work begins under this plan + +## Phase 1: Define Completion-State Semantics + +### Objective + +Turn callback completion into an explicit bounded semantic model. + +### Tasks + +1. define `ok` +2. define `partial` +3. define `blocked` +4. define `error` + +### Acceptance Criteria + +1. callback state logic is explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the callback-semantic model into one bounded next artifact. + +### Tasks + +1. publish the semantic result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step remains narrower than direct host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-state-verification-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-state-verification-plan.md new file mode 100644 index 0000000..5091b9f --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-state-verification-plan.md @@ -0,0 +1,72 @@ +# G6 Host-Bridge Callback State Verification Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-callback-state-verification-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-state-verification-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge callback state verification` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly +3. do not open `G8` +4. do not reopen `G7` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze callback-state verification scope +2. `WS2` Define verification targets for `ok/partial/blocked/error` +3. `WS3` Publish one bounded verification result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to callback-state verification only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze implementation and real execution as out of scope + +### Acceptance Criteria + +1. no broader host-runtime work begins under this plan + +## Phase 1: Define Verification Targets + +### Objective + +Turn the explicit callback states into bounded verification targets. + +### Tasks + +1. define verification target for `ok` +2. define verification target for `partial` +3. define verification target for `blocked` +4. define verification target for `error` + +### Acceptance Criteria + +1. callback verification targets are explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the callback-state verification model into one bounded next artifact. + +### Tasks + +1. publish the verification result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step remains narrower than direct host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-plan.md new file mode 100644 index 0000000..785e77e --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-plan.md @@ -0,0 +1,71 @@ +# G6 Host-Bridge Entry Gate Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-entry-gate-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge entry gate` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly +3. do not open `G8` +4. do not reopen `G7` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze entry-gate scope +2. `WS2` Define bounded gate conditions +3. `WS3` Publish one bounded gate result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to entry-gate modeling only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze implementation and real execution as out of scope + +### Acceptance Criteria + +1. no broader host-runtime work begins under this plan + +## Phase 1: Define Gate Conditions + +### Objective + +Turn the semantic readiness criteria into bounded gate conditions. + +### Tasks + +1. define hard gate conditions +2. define soft/optional later conditions +3. define fail-close gate reasons + +### Acceptance Criteria + +1. entry-gate conditions are explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the gate model into one bounded next artifact. + +### Tasks + +1. publish the gate result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step remains narrower than direct host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-verification-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-verification-plan.md new file mode 100644 index 0000000..fd8a882 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-verification-plan.md @@ -0,0 +1,70 @@ +# G6 Host-Bridge Entry Gate Verification Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-entry-gate-verification-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-verification-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge entry gate verification` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly +3. do not open `G8` +4. do not reopen `G7` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze gate-verification scope +2. `WS2` Define bounded verification targets for the hard gate +3. `WS3` Publish one bounded verification result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to gate verification only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze implementation and real execution as out of scope + +### Acceptance Criteria + +1. no broader host-runtime work begins under this plan + +## Phase 1: Define Verification Targets + +### Objective + +Turn the hard gate into bounded verification targets. + +### Tasks + +1. define verification target for each hard gate condition +2. define verification target for each fail-close reason + +### Acceptance Criteria + +1. gate verification targets are explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the gate-verification model into one bounded next artifact. + +### Tasks + +1. publish the verification result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step remains narrower than direct host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-readiness-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-readiness-plan.md new file mode 100644 index 0000000..e525080 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-readiness-plan.md @@ -0,0 +1,71 @@ +# G6 Host-Bridge Entry Readiness Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-entry-readiness-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-readiness-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge entry readiness` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly +3. do not open `G8` +4. do not reopen `G7` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze entry-readiness scope +2. `WS2` Define bounded readiness criteria +3. `WS3` Publish one bounded readiness result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to entry-readiness only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze implementation and real execution as out of scope + +### Acceptance Criteria + +1. no broader host-runtime work begins under this plan + +## Phase 1: Define Readiness Criteria + +### Objective + +Turn the explicit callback verification model into bounded entry-readiness criteria. + +### Tasks + +1. define which semantics are required before `G6` entry can open +2. define which semantics remain optional +3. define the minimal readiness threshold + +### Acceptance Criteria + +1. entry-readiness criteria are explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the readiness model into one bounded next artifact. + +### Tasks + +1. publish the readiness result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step remains narrower than direct host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-execution-semantics-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-execution-semantics-plan.md new file mode 100644 index 0000000..82904e9 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-execution-semantics-plan.md @@ -0,0 +1,71 @@ +# G6 Host-Bridge Execution Semantics Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-execution-semantics-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-execution-semantics-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge execution semantics` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly +3. do not open `G8` +4. do not reopen `G7` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze the semantic boundary +2. `WS2` Separate bridge invocation from callback completion +3. `WS3` Publish one bounded semantic result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to semantic scoping only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze real execution and implementation as out of scope + +### Acceptance Criteria + +1. no host-runtime implementation begins under this plan + +## Phase 1: Separate the Minimum Semantics + +### Objective + +Turn the blocked capability into explicit bounded semantics. + +### Tasks + +1. isolate bridge action invocation semantics +2. isolate callback completion semantics +3. keep both separate from broader host-runtime work + +### Acceptance Criteria + +1. the semantic model is explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the semantic model into one bounded next artifact. + +### Tasks + +1. publish the semantic result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step remains narrower than direct host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-host-bridge-prerequisites-plan.md b/docs/superpowers/plans/2026-04-19-g6-host-bridge-prerequisites-plan.md new file mode 100644 index 0000000..4bbc28e --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-host-bridge-prerequisites-plan.md @@ -0,0 +1,71 @@ +# G6 Host-Bridge Prerequisites Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-host-bridge-prerequisites-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-host-bridge-prerequisites-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G6 host-bridge prerequisites` + +## Scope Guardrails + +1. do not execute a `G6` real sample +2. do not implement host-runtime directly under this plan +3. do not reopen `G7` +4. do not open `G8` +5. do not open `G4 / G5` + +## Workstreams + +1. `WS1` Freeze the G6 prerequisite boundary +2. `WS2` Isolate the minimum blocked host-bridge capability +3. `WS3` Publish one bounded prerequisite result + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to `G6` prerequisite scoping only. + +### Tasks + +1. freeze `G6` as the only target +2. freeze `G6` real-sample execution as out of scope + +### Acceptance Criteria + +1. no other boundary family is touched under this plan + +## Phase 1: Isolate the Minimum Blocked Capability + +### Objective + +Reduce `G6` prerequisite pressure to the smallest explicit capability gap. + +### Tasks + +1. restate the current `G6` hold condition +2. isolate the minimum host-bridge execution semantic still missing +3. keep that capability separate from broader runtime-platform work + +### Acceptance Criteria + +1. the blocked capability is explicit and bounded + +## Phase 2: Publish the Bounded Result + +### Objective + +Turn the isolated prerequisite into one bounded next artifact. + +### Tasks + +1. publish the prerequisite result +2. if needed, publish the next bounded follow-up plan + +### Acceptance Criteria + +1. the next step is narrower than broad host-runtime implementation diff --git a/docs/superpowers/plans/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-plan.md b/docs/superpowers/plans/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-plan.md new file mode 100644 index 0000000..e43aee3 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-plan.md @@ -0,0 +1,159 @@ +# G6 Real-Sample Entry Preparation And Bounded Execution Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-design.md) + +## Plan Intent + +This plan is the only surviving `G6` execution plan after the redesign. + +Its purpose is: + +`stop G6 planning recursion and move directly to one bounded implementation-plus-real-sample slice` + +## Scope Guardrails + +1. do not open any new `G6` semantic sub-plan +2. do not reopen `G7` +3. do not open `G8` +4. do not open `G4 / G5` +5. do not broaden into host-runtime platform redesign +6. do not add more than one fixed `G6` real sample + +## Preserved G6 Gate + +The final frozen `G6` gate under this plan is: + +### Hard Conditions + +1. `host-bridge-action-invocation-defined` +2. `callback-request-completion-defined` +3. `callback-state-verification-targets-defined` + +### Soft Later Conditions + +1. `host-runtime-transport-implementation` +2. `real-sample-execution-proof` + +### Fail-Close Reasons + +1. `g6_bridge_invocation_semantics_missing` +2. `g6_callback_completion_semantics_missing` +3. `g6_callback_state_targets_missing` + +## Workstreams + +1. `WS1` Freeze the Final G6 Entry Gate +2. `WS2` Implement the Minimum Host-Bridge Execution Seam +3. `WS3` Run the Fixed G6 Real Sample +4. `WS4` Write Back Validation And Close + +## Phase 0: Freeze The Final Gate + +### Objective + +Stop semantic drift and declare the gate final for this execution slice. + +### Tasks + +1. treat the hard `G6` gate as frozen +2. treat the fail-close reasons as frozen +3. explicitly forbid any further `G6` semantic micro-plan under this line + +### Deliverables + +1. final frozen `G6` gate note +2. final fixed-sample statement + +### Acceptance Criteria + +1. no further `G6` semantic clarification plan is produced + +## Phase 1: Implement The Minimum Execution Seam + +### Objective + +Add only the minimum implementation needed to let the fixed `G6` real sample enter one controlled execution attempt. + +### Tasks + +1. implement the minimum host-bridge invocation seam required by the fixed sample +2. implement the minimum callback completion handling required by the fixed sample +3. keep the change narrower than generic host-runtime redesign +4. preserve fail-close behavior when the frozen hard conditions are not met + +### Deliverables + +1. bounded `G6` code change +2. bounded regression tests + +### Acceptance Criteria + +1. `G6` execution support is improved only at the seam required by the fixed sample +2. unrelated families are untouched +3. fail-close remains explicit + +## Phase 2: Execute The Fixed Real Sample + +### Objective + +Use one real `G6` sample to prove whether the bounded implementation slice is enough. + +### Tasks + +1. run the fixed `G6` real sample once +2. classify the result only as: + - `executed-pass` + - `named mismatch` +3. do not open a new semantic sub-plan regardless of result + +### Deliverables + +1. real execution result +2. fixed-sample execution note + +### Acceptance Criteria + +1. the result is narrower than “not executed” +2. the result is not deferred into another semantic-planning loop + +## Phase 3: Validation Closure + +### Objective + +Write the fixed result back and close the line. + +### Tasks + +1. update validation-layer assets +2. if pass: close `G6` +3. if mismatch: write one implementation correction plan only +4. publish a closure report + +### Deliverables + +1. validation asset update +2. closure report +3. optional implementation correction plan if mismatch occurs + +### Acceptance Criteria + +1. `G6` ends in `executed-pass` or `named mismatch` +2. no new semantic micro-plan is emitted + +## Completion Criteria + +This plan is complete when: + +1. one bounded implementation seam is landed +2. one fixed `G6` real sample is executed +3. the line closes with `executed-pass` or `named mismatch` + +## Non-Negotiable Stop Rule + +After this plan starts executing: + +1. do not create another `G6` semantic plan +2. if the run fails, create only one implementation correction plan +3. if the run passes, close the `G6` line immediately diff --git a/docs/superpowers/plans/2026-04-19-g7-real-sample-entry-plan.md b/docs/superpowers/plans/2026-04-19-g7-real-sample-entry-plan.md new file mode 100644 index 0000000..e72cfde --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-g7-real-sample-entry-plan.md @@ -0,0 +1,92 @@ +# G7 Real-Sample Entry Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-g7-real-sample-entry-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-g7-real-sample-entry-design.md) + +## Plan Intent + +This plan executes one bounded next slice: + +`G7 real-sample entry` + +## Scope Guardrails + +1. do not reopen mainline families +2. do not execute `G6` or `G8` +3. do not add new `G7` family fixtures +4. do not implement new runtime-platform prerequisites under this plan +5. do not open `G4 / G5` + +## Fixed Verification Anchor + +The only target under this plan is: + +1. `计量资产库存统计` + +## Workstreams + +1. `WS1` Freeze the G7 real-sample boundary +2. `WS2` Build the real-sample contract differential +3. `WS3` Rerun the fixed real sample against the existing G7 runtime contract +4. `WS4` Update validation assets and close the loop + +## Phase 0: Freeze the Boundary + +### Objective + +Lock the plan to one `G7` representative sample. + +### Tasks + +1. freeze `计量资产库存统计` as the only real-sample anchor +2. freeze existing `G7` repo-local runtime contract as the starting baseline + +### Acceptance Criteria + +1. no other boundary family is touched under this plan + +## Phase 1: Build the Differential + +### Objective + +Understand whether the existing `G7` runtime contract is already close enough for a real-sample rerun. + +### Tasks + +1. compare the representative `G7` fixture contract to the chosen real sample +2. isolate the smallest remaining contract risk + +### Acceptance Criteria + +1. the rerun target is explicit and bounded + +## Phase 2: Real-Sample Rerun + +### Objective + +Use the fixed real sample to test the current `G7` runtime contract. + +### Tasks + +1. run `sg_scene_generate` on the fixed `G7` real sample +2. record whether the result is a pass or a smaller mismatch + +### Acceptance Criteria + +1. the outcome is narrower than `not yet executed` + +## Phase 3: Validation Closure + +### Objective + +Write the result back into the validation layer and close the bounded slice. + +### Tasks + +1. update validation assets if the outcome narrows +2. write a closure report + +### Acceptance Criteria + +1. the next boundary-family ambiguity is reduced further without broadening roadmap scope diff --git a/docs/superpowers/plans/2026-04-19-host-bridge-runtime-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-host-bridge-runtime-roadmap-plan.md new file mode 100644 index 0000000..795cee9 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-host-bridge-runtime-roadmap-plan.md @@ -0,0 +1,44 @@ +# Host-Bridge Runtime Roadmap Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Fixed Scene: `sweep-085-scene` +> Status: Draft + +## Plan Intent + +Run a bounded host-bridge runtime slice for the single remaining `host_bridge_workflow` residual. + +## Fixed Input Bucket + +1. `sweep-085-scene` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/scene_generator_test.rs` +4. `tests/fixtures/generated_scene/host_bridge_runtime_followup_2026-04-19.json` +5. `tests/fixtures/generated_scene/host_bridge_runtime_reconciliation_candidates_2026-04-19.json` +6. `docs/superpowers/reports/2026-04-19-host-bridge-runtime-roadmap-report.md` + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. Freeze the current `sweep-085-scene` generation report. +2. Identify the exact host-bridge runtime missing piece. +3. Implement at most one bounded correction slice if it can be expressed in generated-scene contract or fail-closed reporting. +4. Rerun only `sweep-085-scene`. +5. Publish follow-up and reconciliation candidate assets. + +## Expected Delta + +Target delta is `+1 framework-auto-pass-candidate` if the host-bridge contract can be closed without full runtime transport. Otherwise the delta is `0`, with a narrower named runtime hold. + +## Stop Statement + +Stop after the single-scene follow-up and reconciliation candidates are published. Do not update the official board under this plan. diff --git a/docs/superpowers/plans/2026-04-19-local-doc-official-board-reconciliation-refresh-plan.md b/docs/superpowers/plans/2026-04-19-local-doc-official-board-reconciliation-refresh-plan.md new file mode 100644 index 0000000..7bd28b8 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-local-doc-official-board-reconciliation-refresh-plan.md @@ -0,0 +1,50 @@ +# Local-Doc Official Board Reconciliation Refresh Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Roadmap: `2026-04-19-local-doc-runtime-roadmap-plan.md` +> Status: Active + +## Plan Intent + +Refresh the official execution board using the five local-doc framework auto-pass candidates produced by the local-doc runtime roadmap. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/local_doc_runtime_reconciliation_candidates_2026-04-19.json` +2. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Allowed Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/local_doc_official_board_reconciliation_refresh_2026-04-19.json` +3. `docs/superpowers/reports/2026-04-19-local-doc-official-board-reconciliation-refresh-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` + +## Tasks + +1. Load the official execution board. +2. Load the local-doc reconciliation candidates. +3. Verify the candidate asset contains exactly the five fixed local-doc scene ids. +4. Match board rows by `sceneId`. +5. Update only framework-layer fields for the five matched rows. +6. Recompute board framework summary counts. +7. Publish reconciliation refresh JSON. +8. Publish reconciliation refresh report. + +## Acceptance Criteria + +1. Board scene count remains `102`. +2. The five fixed local-doc scene ids have `currentFrameworkStatus = framework-auto-pass`. +3. Board framework counts are `framework-auto-pass = 100` and `framework-structured-fail-closed = 2`. +4. Host-bridge and bootstrap residuals remain structured fail-closed. +5. Analyzer and generator are not modified by this plan. + +## Stop Statement + +Stop after the local-doc official board reconciliation refresh JSON and report are published. Do not start host-bridge runtime or bootstrap normalization under this plan. diff --git a/docs/superpowers/plans/2026-04-19-local-doc-runtime-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-local-doc-runtime-roadmap-plan.md new file mode 100644 index 0000000..8934745 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-local-doc-runtime-roadmap-plan.md @@ -0,0 +1,54 @@ +# Local-Doc Runtime Roadmap Plan + +> Date: 2026-04-19 +> Parent Decision: `2026-04-19-residual-runtime-roadmap-prioritization-plan.md` +> Parent Residual Bucket: `local_doc_pipeline` +> Status: Draft + +## Plan Intent + +Plan the bounded closure path for the five `local_doc_pipeline` residuals selected by the residual runtime roadmap prioritization decision. + +## Fixed Input Bucket + +Only these scenes are in scope: + +1. `sweep-033-scene` +2. `sweep-034-scene` +3. `sweep-042-scene` +4. `sweep-051-scene` +5. `sweep-074-scene` + +## Initial Phases + +### Phase 0: Freeze Local-Doc Residual Baseline + +Capture current generation reports and missing pieces for the five scenes. + +### Phase 1: Local-Doc Evidence Inventory + +Classify document source, attachment dependency, local service dependency, and output artifact expectation. + +### Phase 2: Minimal Local-Doc Contract Design + +Define the smallest contract that can distinguish runnable local-doc pipelines from policy-held local-doc pipelines. + +### Phase 3: Bounded Implementation Slice + +Implement only the contract recovery or fail-closed detail required by the five-scene bucket. + +### Phase 4: Follow-Up Sweep And Reconciliation + +Rerun only the five target scenes and publish candidates. Do not update the official board inside this phase. + +## Forbidden Scope + +1. host-bridge runtime roadmap; +2. bootstrap target normalization; +3. G4/G5; +4. full attachment runtime implementation unless explicitly required by the minimal contract; +5. official board update. + +## Stop Statement + +Stop after the local-doc five-scene follow-up and reconciliation candidates are published. A later official board reconciliation plan must consume the result. diff --git a/docs/superpowers/plans/2026-04-19-official-board-reconciliation-plan.md b/docs/superpowers/plans/2026-04-19-official-board-reconciliation-plan.md new file mode 100644 index 0000000..287a821 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-official-board-reconciliation-plan.md @@ -0,0 +1,50 @@ +# Official Board Reconciliation Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer E` +> Status: Active + +## Plan Intent + +Update the official execution board from the final coverage status rollup. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/final_coverage_status_rollup_2026-04-19.json` +2. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Allowed Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/official_board_reconciliation_2026-04-19.json` +3. `docs/superpowers/reports/2026-04-19-official-board-reconciliation-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` + +## Tasks + +1. Load the official execution board. +2. Load the final coverage rollup. +3. Match scenes by `sceneId` where present, falling back to ordered index only if necessary. +4. Preserve frozen workbook fields. +5. Add final framework status fields to each board scene. +6. Update board summary with framework status counts. +7. Publish reconciliation JSON. +8. Publish reconciliation report. + +## Completion Criteria + +1. Board scene count remains `102`. +2. Framework status counts are `95` framework auto-pass and `7` structured fail-closed. +3. No source-unreadable, unsupported-family, missing-source, or unresolved status remains. +4. Analyzer and generator are not modified by this plan. +5. Reconciliation report is published. + +## Stop Statement + +Stop after the official board reconciliation JSON and report are published. Do not start runtime-roadmap work under this plan. diff --git a/docs/superpowers/plans/2026-04-19-post-g7-boundary-decision-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-post-g7-boundary-decision-roadmap-plan.md new file mode 100644 index 0000000..f83dcf8 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-post-g7-boundary-decision-roadmap-plan.md @@ -0,0 +1,144 @@ +# Post-G7 Boundary Decision Roadmap Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: [2026-04-19-post-g7-boundary-decision-roadmap-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-19-post-g7-boundary-decision-roadmap-design.md) + +## Plan Intent + +This roadmap determines the next bounded step after `G7` has already closed as the first boundary-family executed real sample. + +Its only purpose is: + +`decide whether G6 or G8 may enter real-sample execution scope next, or whether both remain held pending prerequisites` + +## Scope Guardrails + +1. do not reopen `G7` +2. do not reopen `G1-E / G2 / G3` +3. do not implement runtime-platform prerequisites under this roadmap +4. do not execute real samples for more than one remaining boundary family +5. do not open `G4 / G5` + +## Candidate Directions + +The only remaining directions under this roadmap are: + +1. `G6` +2. `G8` +3. `prerequisites-only hold` + +## Workstreams + +1. `WS1` Freeze the Post-G7 Starting State +2. `WS2` Compare G6 and G8 Entry Cost +3. `WS3` Select One Next Direction +4. `WS4` Publish the Next Bounded Slice + +## Phase 0: Freeze the Starting State + +### Objective + +Lock the roadmap start point so the decision cannot drift back into closed work. + +### Tasks + +1. freeze `G7` as closed executed-pass +2. freeze `G6` and `G8` as the only remaining boundary candidates +3. freeze `G1-E / G2 / G3` as closed +4. freeze `G4 / G5` as out of scope + +### Deliverables + +1. starting-state note +2. fixed candidate list + +### Acceptance Criteria + +1. no closed family is reopened under this roadmap + +## Phase 1: Compare the Remaining Boundary Candidates + +### Objective + +Compare `G6` and `G8` using explicit entry cost and prerequisite pressure. + +### Tasks + +1. restate the current hold condition for `G6` +2. restate the current hold condition for `G8` +3. compare which one requires the smaller new capability to enter real-sample scope +4. compare whether either direction is still too expensive and should remain held + +### Deliverables + +1. `G6 vs G8` comparison matrix +2. smallest-next-step summary + +### Acceptance Criteria + +1. the preferred next direction is justified explicitly +2. the non-selected direction has an explicit hold reason + +## Phase 2: Select One Next Direction + +### Objective + +Reduce the post-`G7` ambiguity to one bounded decision. + +### Tasks + +1. select exactly one direction: + - `G6` + - `G8` + - or `prerequisites-only hold` +2. record why the other directions remain out of scope + +### Deliverables + +1. post-`G7` boundary decision +2. hold reasons for non-selected directions + +### Acceptance Criteria + +1. only one next direction is opened +2. the decision is bounded and defensible + +## Phase 3: Publish the Next Bounded Slice + +### Objective + +Turn the decision into the next executable bounded artifact. + +### Tasks + +1. if `G6` is selected, write a bounded `G6 real-sample entry` design and plan +2. if `G8` is selected, write a bounded `G8 real-sample entry` design and plan +3. if `prerequisites-only hold` is selected, write a bounded prerequisites roadmap +4. publish a roadmap closure report + +### Deliverables + +1. next bounded `design` +2. next bounded `plan` +3. roadmap closure report + +### Acceptance Criteria + +1. the next step is ready without extending this roadmap +2. only one bounded direction is emitted + +## Completion Criteria + +This roadmap is complete when: + +1. the post-`G7` next step is reduced to one bounded direction +2. `G6` and `G8` no longer compete ambiguously +3. a single follow-up `design + plan` exists for the selected direction + +## Next Step + +After this roadmap completes: + +1. execute the selected bounded slice +2. do not reopen this roadmap during execution diff --git a/docs/superpowers/plans/2026-04-19-promotion-and-board-reconciliation-policy-plan.md b/docs/superpowers/plans/2026-04-19-promotion-and-board-reconciliation-policy-plan.md new file mode 100644 index 0000000..af6a29b --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-promotion-and-board-reconciliation-policy-plan.md @@ -0,0 +1,60 @@ +# Promotion And Board Reconciliation Policy Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 6: promotion and board reconciliation` +> Parent Layer: `Layer E` +> Upstream Design: `docs/superpowers/specs/2026-04-19-promotion-and-board-reconciliation-policy-design.md` + +## Plan Intent + +Publish the promotion and reconciliation policy that governs how future stronger statuses may update official scene-state assets. + +## Fixed Input Bucket + +Policy inputs only: + +1. `auto-pass` +2. `fail-closed-known` +3. `adjudicated-valid-host-bridge` +4. hygiene-aware timeout interpretation + +## Allowed Files + +1. policy design and plan docs +2. policy JSON assets +3. policy reports + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. define promotion thresholds +2. define how timeout hygiene is represented +3. define how structured fail-closed progress is represented +4. define what evidence is sufficient for board reconciliation +5. publish policy assets + +## Expected Coverage Delta + +No direct scene-count delta is required. + +The expected result is policy readiness for later rule-driven reconciliation. + +## Completion Criteria + +1. promotion thresholds are explicit +2. timeout hygiene representation is explicit +3. board update rules are explicit + +## Stop Statement + +Stop after the Route 6 policy is published. + +Do not update the execution board under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-remaining-route-conflict-correction-plan.md b/docs/superpowers/plans/2026-04-19-remaining-route-conflict-correction-plan.md new file mode 100644 index 0000000..dd6f9d9 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-remaining-route-conflict-correction-plan.md @@ -0,0 +1,168 @@ +# Remaining Route Conflict Correction Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: `docs/superpowers/specs/2026-04-19-remaining-route-conflict-correction-design.md` + +## Plan Intent + +Adjudicate and, where evidence supports it, correct the remaining `4` route conflicts from the follow-up full sweep. + +This is a bounded route-conflict plan, not a new full-sweep roadmap. + +## Fixed Input + +Use only the `4` `misclassified` records from: + +`tests/fixtures/generated_scene/full_sweep_improvement_followup_2026-04-19.json` + +The fixed scene set is: + +1. `95598报修工单日管控` +2. `95598重要服务事项报备统计表` +3. `台区线损台区月度高负损预测` +4. `配网支撑月报(95598抢修统计报表)` + +## Fixed Outputs + +1. `tests/fixtures/generated_scene/remaining_route_conflict_decisions_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-remaining-route-conflict-correction-report.md` + +## Scope Guardrails + +1. do not touch timeout handling +2. do not touch structured fail-closed reporting +3. do not add new families +4. do not update execution board +5. do not promote scenes +6. do not weaken current `G2/G3/G6` pass cases +7. do not force a scene into G2/G3 if host bridge is the only complete path + +## Phase 0: Freeze Conflict Set + +### Objective + +Freeze the `4` route conflicts as the only input. + +### Tasks + +1. read `full_sweep_improvement_followup_2026-04-19.json` +2. select only `dryRunStatus = misclassified` +3. verify count is `4` +4. freeze expected group and inferred archetype for each record + +### Deliverables + +1. frozen route conflict inventory + +### Acceptance Criteria + +1. exactly `4` records are in scope +2. no extra scene is added + +## Phase 1: Evidence Adjudication + +### Objective + +Decide whether each conflict should be corrected or retained as host bridge. + +### Tasks + +1. inspect existing generation reports for the `4` records +2. compare business-chain evidence against host-bridge evidence +3. apply the route decision model: + - `route-corrected-to-g3` + - `route-corrected-to-g2` + - `valid-host-bridge-workflow` + - `board-expectation-stale` + - `route-conflict-unresolved` +4. write preliminary decisions + +### Deliverables + +1. preliminary route conflict decision table + +### Acceptance Criteria + +1. all `4` records have a preliminary decision +2. no code is changed before evidence is adjudicated + +## Phase 2: Bounded Route Correction + +### Objective + +Apply only the route corrections justified by Phase 1. + +### Tasks + +1. update analyzer routing precedence only if evidence supports correction +2. keep valid host-bridge cases unchanged +3. add targeted regression tests for corrected cases +4. preserve existing `G2/G3/G6` real-sample and canonical tests + +### Deliverables + +1. bounded analyzer routing patch if needed +2. route conflict regression tests + +### Acceptance Criteria + +1. corrected records no longer misclassify +2. valid host-bridge records remain host bridge +3. no broad routing rewrite is introduced + +## Phase 3: Targeted Probe + +### Objective + +Verify only the fixed `4` records after correction. + +### Tasks + +1. rerun generation for the same `4` scenes +2. record resulting archetype and readiness +3. classify each final decision +4. write final decision JSON + +### Deliverables + +1. `remaining_route_conflict_decisions_2026-04-19.json` + +### Acceptance Criteria + +1. all `4` records have final probe results +2. no full `102` sweep is required by this plan + +## Phase 4: Report and Stop + +### Objective + +Publish the route conflict report and stop. + +### Tasks + +1. write the route conflict correction report +2. include final decisions for all `4` records +3. list verification commands +4. explicitly state that the execution board is not updated + +### Deliverables + +1. route conflict correction report + +### Acceptance Criteria + +1. all `4` conflicts are adjudicated +2. tests pass +3. no execution board update is made + +## Completion Criteria + +This plan is complete when: + +1. the fixed `4` route conflicts have final decisions +2. targeted probes have been run +3. relevant regressions pass +4. decision JSON and report are published +5. execution stops without opening another plan + diff --git a/docs/superpowers/plans/2026-04-19-residual-13-followup-sweep-and-reconciliation-plan.md b/docs/superpowers/plans/2026-04-19-residual-13-followup-sweep-and-reconciliation-plan.md new file mode 100644 index 0000000..9b144e3 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-residual-13-followup-sweep-and-reconciliation-plan.md @@ -0,0 +1,40 @@ +# Residual 13 Follow-Up Sweep And Reconciliation Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md` +> Parent Route: `Residual Route E` +> Parent Layer: `Layer E` + +## Plan Intent + +Measure the cumulative delta after residual Routes A through D complete. + +## Fixed Input Bucket + +The fixed input bucket is the same `13` residual scenes from the parent residual closure plan. + +## Allowed Files + +1. residual follow-up JSON asset +2. residual reconciliation candidate JSON asset +3. residual follow-up report +4. residual reconciliation report + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. rerun the fixed 13 residual scenes; +2. classify raw statuses; +3. apply promotion policy; +4. report remaining residual count. + +## Stop Statement + +Stop after residual follow-up and reconciliation reports. + diff --git a/docs/superpowers/plans/2026-04-19-residual-runtime-roadmap-prioritization-plan.md b/docs/superpowers/plans/2026-04-19-residual-runtime-roadmap-prioritization-plan.md new file mode 100644 index 0000000..85d51a5 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-residual-runtime-roadmap-prioritization-plan.md @@ -0,0 +1,49 @@ +# Residual Runtime Roadmap Prioritization Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer E` +> Status: Active + +## Plan Intent + +Select the next roadmap from the three residual inputs after official board reconciliation. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/official_board_reconciliation_2026-04-19.json` + +## Allowed Files + +1. `tests/fixtures/generated_scene/residual_runtime_roadmap_prioritization_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-residual-runtime-roadmap-prioritization-report.md` +3. selected next roadmap design +4. selected next roadmap plan + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Tasks + +1. Load official board residual records. +2. Group residuals by next action. +3. Score local-doc runtime, host-bridge runtime, and bootstrap target normalization. +4. Select exactly one next roadmap. +5. Publish prioritization JSON. +6. Publish prioritization report. +7. Create design/plan for the selected roadmap only. + +## Completion Criteria + +1. All `7` residual records are represented. +2. Exactly one selected roadmap exists. +3. Non-selected roadmaps are deferred with reasons. +4. No implementation file is modified. + +## Stop Statement + +Stop after prioritization assets and the selected next roadmap design/plan are published. Do not execute the selected roadmap under this plan. diff --git a/docs/superpowers/plans/2026-04-19-scene-skill-102-final-materialization-plan.md b/docs/superpowers/plans/2026-04-19-scene-skill-102-final-materialization-plan.md new file mode 100644 index 0000000..7530edf --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-scene-skill-102-final-materialization-plan.md @@ -0,0 +1,129 @@ +# Scene Skill 102 Final Materialization Plan + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: final asset materialization before validation +> Status: Draft + +## Plan Intent + +Generate and freeze a single canonical `102` skill package set for later static, mock, and production-like validation. + +This plan answers whether all 102 scenes have materialized skill assets, not just framework auto-pass status. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_skill_102_framework_closure_rollup_2026-04-19.json` +3. scene source root: `D:/desk/智能体资料/全量业务场景/一平台场景` + +## Output Root + +`examples/scene_skill_102_final_materialization_2026-04-19` + +## Allowed Files + +1. `examples/scene_skill_102_final_materialization_2026-04-19/**` +2. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +3. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` +4. `docs/superpowers/reports/2026-04-19-scene-skill-102-final-materialization-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +5. existing `examples/*` follow-up roots outside the output root + +## Phase 0: Freeze Materialization Boundary + +### Tasks + +1. Confirm framework rollup is `102 / 102`. +2. Confirm materialization does not delete existing `examples/*`. +3. Confirm this plan does not perform static/mock/production validation. + +### Acceptance Criteria + +1. Scope is materialization only. +2. Output root is isolated. + +## Phase 1: Build Materialization Input Manifest + +### Tasks + +1. Load official board or fallback source-list assets. +2. Produce exactly 102 materialization input rows. +3. Validate unique scene ids. +4. Resolve source directory for each scene. +5. Sanitize manifest-only string fields for control characters. + +### Acceptance Criteria + +1. Input manifest has 102 rows. +2. No missing source directory remains. +3. No duplicate scene id remains. + +## Phase 2: Generate 102 Skill Packages + +### Tasks + +For each manifest row, run: + +```powershell +cargo run --bin sg_scene_generate -- ` + --source-dir "" ` + --scene-id "" ` + --scene-name "" ` + --scene-kind report_collection ` + --output-root "D:/data/ideaSpace/rust/sgClaw/claw-new/examples/scene_skill_102_final_materialization_2026-04-19" +``` + +### Acceptance Criteria + +1. Every row is attempted. +2. No single scene failure stops the full batch. +3. stdout/stderr/result status are captured. + +## Phase 3: Verify Materialized Package Presence + +### Tasks + +For each scene, check: + +1. `SKILL.toml` +2. `SKILL.md` +3. `scene.toml` +4. `references/generation-report.json` +5. at least one script under `scripts/` + +### Acceptance Criteria + +1. All successful rows have required files. +2. Failures are explicit in the failures asset. + +## Phase 4: Publish Manifest And Report + +### Tasks + +1. Publish final materialization manifest. +2. Publish final materialization failures. +3. Publish superpowers report. + +### Acceptance Criteria + +1. Manifest row count is 102. +2. Report states generated count, failure count, readiness distribution, and next validation input. +3. The report explicitly states that old `examples/*` roots were not cleaned. + +## Expected Delta + +No framework coverage delta. Expected asset delta is: + +1. `102` canonical final skill package rows; +2. one stable manifest for later validation. + +## Stop Statement + +Stop after final materialization manifest, failures asset, and report are published. Do not start static, mock, or production validation under this plan. diff --git a/docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-plan.md b/docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-plan.md new file mode 100644 index 0000000..58a8a0b --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-plan.md @@ -0,0 +1,132 @@ +# Scene Skill 102 Full Coverage Child Plan Sequence Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Upstream Design: `docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-design.md` + +## Plan Intent + +Create the full bounded child-plan sequence for `Route 2` through `Route 6` under the `102` full-coverage parent framework. + +This plan only creates the downstream plan tree. It does not implement any bucket directly. + +## Scope Guardrails + +1. do not modify `analyzer.rs` +2. do not modify `generator.rs` +3. do not modify `ir.rs` +4. do not update `scene_execution_board_2026-04-18.json` +5. do not rerun `102` sweep +6. do not open new families +7. do not collapse multiple buckets into one child implementation plan + +## Workstreams + +1. `WS1` Route 2 child plans +2. `WS2` Route 3 child plans +3. `WS3` Route 4 child plans +4. `WS4` Route 5 child plans +5. `WS5` Route 6 child plans + +## Phase 0: Freeze Sequence Inputs + +### Objective + +Freeze the parent baseline and route order before generating child plans. + +### Tasks + +1. freeze parent framework references +2. freeze current bucket sizes +3. freeze route order from Route 2 through Route 6 + +### Deliverables + +1. child-plan sequence design +2. child-plan sequence plan + +### Acceptance Criteria + +1. all later child plans can reference the same parent baseline +2. route order is explicit and cannot drift + +## Phase 1: Route 2 Child Plans + +### Objective + +Create the first three bounded child plans under the largest remaining mainline bucket. + +### Tasks + +1. create `G3 enrichment-request closure` design and plan +2. create `G3 export-plan closure` design and plan +3. create `G3 residual contract closure` design and plan + +### Deliverables + +1. Route 2 child designs +2. Route 2 child plans + +### Acceptance Criteria + +1. each Route 2 child plan owns a narrower fixed bucket +2. Route 2 plans declare allowed and forbidden file sets +3. Route 2 plans declare expected deltas separately + +## Phase 2: Route 3 and Route 4 Child Plans + +### Objective + +Create the bounded plans for the smaller remaining mainline buckets. + +### Tasks + +1. create `G2 remaining fail-closed closure` design and plan +2. create `G1-E remaining fail-closed closure` design and plan + +### Deliverables + +1. Route 3 child design and plan +2. Route 4 child design and plan + +### Acceptance Criteria + +1. Route 3 and Route 4 remain downstream of Route 2 +2. neither plan absorbs Route 2 issues + +## Phase 3: Route 5 and Route 6 Child Plans + +### Objective + +Create the policy and decision plans that follow mainline contract-recovery work. + +### Tasks + +1. create `boundary fail-closed decision` design and plan +2. create `promotion and board reconciliation policy` design and plan + +### Deliverables + +1. Route 5 child design and plan +2. Route 6 child design and plan + +### Acceptance Criteria + +1. Route 5 is decision-first, not implementation-first +2. Route 6 is policy-only + +## Completion Criteria + +This plan is complete when: + +1. Route 2 through Route 6 all have bounded child designs and plans +2. every child plan declares parent route, parent layer, input bucket, allowed files, forbidden files, expected delta, and stop statement +3. later work can proceed without inventing new unanchored micro-plans + +## Stop Statement + +Stop after the bounded child-plan sequence for Route 2 through Route 6 has been created. + +Do not implement any route from this sequence under this plan. + diff --git a/docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md b/docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md new file mode 100644 index 0000000..15f79a6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md @@ -0,0 +1,298 @@ +# Scene Skill 102 Full Coverage Framework Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-framework-design.md` + +## Plan Intent + +Turn the current sgClaw post-roadmap work into a single controlled framework for driving the `102` scene set toward full bounded coverage. + +This plan is the parent roadmap for all later bounded plans. Future bounded plans must fit inside one of the routes defined here. + +## Current Baseline + +Current integrated baseline: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 47 | +| `adjudicated-valid-host-bridge` | 4 | +| raw `source-unreadable` | 3 | +| Total | 102 | + +Timeout hygiene overlay: + +| Hygiene interpretation | Count | +| --- | ---: | +| `timeout-as-pass-candidate` | 2 | +| `timeout-as-fail-closed-candidate` | 1 | +| `timeout-still-unreadable` | 0 | +| `timeout-rerun-error` | 0 | + +## Overall Goal + +The overall goal is: + +`100% bounded framework coverage for the current 102 scene set` + +This means: + +1. every scene is covered by a supported framework path +2. every non-pass scene has a structured and named reason +3. no unresolved timeout, unsupported-family, or route-conflict bucket remains + +It does not require `100% auto-pass`. + +## Scope Guardrails + +1. do not start `G4/G5` +2. do not add new families unless this parent framework is updated first +3. do not treat diagnostics as promotions +4. do not update `scene_execution_board_2026-04-18.json` inside diagnostic or bounded recovery plans +5. do not mix timeout policy work with contract recovery work in the same bounded implementation plan +6. do not create semantics-only micro-plans that are not tied to one of the routes below + +## Workstreams + +1. `WS1` Coverage and Reporting Integrity +2. `WS2` Mainline Contract Closure +3. `WS3` Boundary Bucket Handling +4. `WS4` Promotion and Board Reconciliation + +## Phase 0: Freeze the Parent Framework + +### Objective + +Make this plan the single parent framework for the next improvement cycle. + +### Tasks + +1. freeze the current integrated baseline +2. freeze the five framework layers +3. freeze the route order +4. forbid out-of-framework micro-plan drift + +### Deliverables + +1. parent framework design +2. parent framework plan + +### Acceptance Criteria + +1. future bounded plans can be mapped to one framework layer +2. future bounded plans can be mapped to one route + +## Phase 1: Close Reporting Integrity + +### Objective + +Finish the reporting-side work so the `102` scene set is measured correctly before further implementation. + +### Route + +`Route 1: Layer E hygiene integration` + +### Tasks + +1. preserve raw timeout counts +2. preserve hygiene-aware timeout interpretation +3. preserve route adjudication +4. preserve structured fail-closed buckets +5. produce reconciliation-friendly current-state reporting + +### Deliverables + +1. timeout hygiene integration assets +2. reconciliation-friendly integrated reporting + +### Acceptance Criteria + +1. no unresolved timeout interpretation remains +2. no unresolved route conflict remains + +## Phase 2: Mainline G3 Contract Closure + +### Objective + +Reduce the largest remaining fail-closed bucket in a controlled way. + +### Route + +`Route 2: G3 / paginated_enrichment` + +### Tasks + +1. freeze the current `G3` fail-closed subgrouping +2. select the top repeated recoverable pattern +3. implement bounded contract recovery +4. rerun only the bounded validation needed by that slice +5. measure delta against the parent baseline + +### Deliverables + +1. bounded G3 implementation plan(s) +2. bounded G3 implementation report(s) +3. updated coverage delta assets + +### Acceptance Criteria + +1. no scene-name hardcoding +2. no gate relaxation +3. canonical `G3` and real-sample `G3` remain stable + +## Phase 3: Mainline G2 Closure + +### Objective + +Reduce the remaining `multi_mode_request` fail-closed bucket. + +### Route + +`Route 3: G2 / multi_mode_request` + +### Tasks + +1. freeze the current `4` G2 fail-closed records +2. identify the common missing contract +3. implement one bounded G2 correction slice +4. rerun bounded validation + +### Deliverables + +1. bounded G2 implementation plan(s) +2. bounded G2 implementation report(s) + +### Acceptance Criteria + +1. real-sample `G2` pass remains stable +2. no route drift into host-bridge or other families + +## Phase 4: Mainline G1-E Closure + +### Objective + +Reduce the remaining `single_request_enrichment` fail-closed bucket. + +### Route + +`Route 4: G1-E / single_request_enrichment` + +### Tasks + +1. freeze the current `2` G1-E fail-closed records +2. identify the common missing contract +3. implement one bounded G1-E correction slice +4. rerun bounded validation + +### Deliverables + +1. bounded G1-E implementation plan(s) +2. bounded G1-E implementation report(s) + +### Acceptance Criteria + +1. real-sample `G1-E` pass remains stable +2. no route drift into host-bridge or page-state families + +## Phase 5: Boundary Buckets After Mainline + +### Objective + +Touch boundary-family fail-closed buckets only after the mainline buckets have been reduced or explicitly deferred. + +### Route + +`Route 5: local_doc_pipeline and host_bridge_workflow remaining fail-closed` + +### Tasks + +1. inspect the `5` local-doc records +2. inspect the `1` host-bridge fail-closed record +3. decide whether to defer or open one bounded boundary correction slice + +### Deliverables + +1. boundary bucket decision report +2. optional bounded boundary plan + +### Acceptance Criteria + +1. no boundary slice starts before mainline routes are resolved or deferred + +## Phase 6: Promotion and Board Policy + +### Objective + +Define how stronger framework-resolved statuses may flow back into official scene status assets. + +### Route + +`Route 6: promotion and board reconciliation` + +### Tasks + +1. define promotion thresholds +2. define how hygiene-aware timeout results are represented +3. define how structured fail-closed progress is represented +4. define what can and cannot update the execution board + +### Deliverables + +1. promotion policy design +2. execution-board reconciliation plan + +### Acceptance Criteria + +1. diagnostics remain distinct from promotion +2. execution board updates become rule-driven instead of ad hoc + +## Route Order + +The route order is fixed: + +1. finish reporting integrity +2. reduce `G3` fail-closed bucket +3. reduce `G2` fail-closed bucket +4. reduce `G1-E` fail-closed bucket +5. inspect boundary fail-closed buckets +6. define promotion and board reconciliation policy + +No bounded plan may skip upward in this order unless this parent plan is revised. + +## Required Contents for Future Bounded Plans + +Every future bounded plan must include: + +1. parent route reference +2. parent framework layer +3. fixed input bucket +4. exact files allowed to change +5. files that must not change +6. expected coverage delta +7. stop statement + +If one of these is missing, the bounded plan is not valid under this framework. + +## Completion Criteria + +This parent framework remains active until all of the following are true: + +1. `unsupported-family = 0` +2. `missing-source = 0` +3. `misclassified-unresolved = 0` +4. `timeout-still-unreadable = 0` +5. every remaining non-pass scene is either: + - structured fail-closed + - adjudicated valid host-bridge + - policy-recognized timeout rerun hygiene result +6. board reconciliation policy exists + +## Stop Statement + +This is a parent framework plan. + +Do not implement code directly from this plan. + +All implementation must happen through later bounded plans that explicitly declare which route and which layer they belong to. diff --git a/docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md b/docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md new file mode 100644 index 0000000..77afbd2 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md @@ -0,0 +1,263 @@ +# Structured Fail-Closed Improvement Roadmap Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Spec: `docs/superpowers/specs/2026-04-19-structured-fail-closed-improvement-roadmap-design.md` +> Upstream Reconciliation: `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` + +## Plan Intent + +Coordinate the next improvement cycle for the `48` structured fail-closed records from the reconciled `102` sweep. + +This is a roadmap-level plan. It intentionally starts with inventory and gap taxonomy before any implementation correction. + +## Baseline + +Current reconciled `102` status: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 2 | + +Fail-closed distribution: + +| Inferred archetype | Count | +| --- | ---: | +| `paginated_enrichment` | 35 | +| `local_doc_pipeline` | 5 | +| `multi_mode_request` | 4 | +| `single_request_enrichment` | 2 | +| `host_bridge_workflow` | 1 | +| `page_state_eval` | 1 | + +## Scope Guardrails + +1. do not add new scene families +2. do not start `G4/G5` +3. do not implement login recovery +4. do not implement full host runtime transport +5. do not implement local document attachment runtime +6. do not update `scene_execution_board_2026-04-18.json` +7. do not promote scenes directly from dry-run or follow-up results +8. do not reopen `adjudicated-valid-host-bridge` records +9. do not handle the `2` timeout records in this roadmap +10. do not loosen readiness gates to increase pass count + +## Workstreams + +1. `WS1` Fail-Closed Inventory and Gap Taxonomy +2. `WS2` G3 Paginated Enrichment Recovery +3. `WS3` Small-Bucket Recovery +4. `WS4` Bootstrap Isolation +5. `WS5` Follow-Up Sweep and Reporting + +## Phase 0: Freeze Structured Fail-Closed Baseline + +### Objective + +Freeze the `48` fail-closed records as the only implementation-analysis input. + +### Tasks + +1. read `full_sweep_status_reconciliation_2026-04-19.json` +2. verify total scene count is `102` +3. verify `fail-closed-known = 48` +4. verify `adjudicated-valid-host-bridge = 4` +5. verify `source-unreadable = 2` +6. extract only records with `reconciledStatus = fail-closed-known` + +### Deliverables + +1. frozen fail-closed input list +2. baseline validation summary + +### Acceptance Criteria + +1. exactly `48` records enter this roadmap +2. route-adjudicated records are excluded +3. timeout records are excluded + +## Phase 1: Build Fail-Closed Inventory and Gap Taxonomy + +### Objective + +Split the `48` records into actionable missing-contract buckets. + +### Tasks + +1. inspect each fail-closed record +2. assign exactly one primary missing-contract label: + - `main_request_missing` + - `pagination_plan_missing` + - `enrichment_request_missing` + - `join_key_missing` + - `export_plan_missing` + - `mode_matrix_missing` + - `mode_request_contract_missing` + - `single_request_enrichment_contract_missing` + - `host_bridge_contract_missing` + - `local_doc_contract_missing` + - `bootstrap_target_unresolved` + - `mixed_or_ambiguous_contract_gap` +3. attach secondary labels when useful +4. group by inferred archetype and primary label +5. identify top repeated recoverable patterns + +### Deliverables + +1. `tests/fixtures/generated_scene/structured_fail_closed_inventory_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-structured-fail-closed-inventory-report.md` + +### Acceptance Criteria + +1. all `48` records have exactly one primary label +2. the `35` `paginated_enrichment` records are explicitly split +3. no implementation is performed in this phase + +## Phase 2: G3 Paginated Enrichment Recovery Slice + +### Objective + +Improve the largest bucket only when Phase 1 identifies repeated recoverable G3 patterns. + +### Tasks + +1. select only `paginated_enrichment` records from the inventory +2. prioritize repeated primary labels in this order: + - `main_request_missing` + - `pagination_plan_missing` + - `enrichment_request_missing` + - `join_key_missing` + - `export_plan_missing` +3. define bounded recovery rules for the top repeated pattern +4. implement only traceable evidence recovery +5. add regression tests for the recovered pattern +6. preserve canonical `G3` and real-sample `G3` pass + +### Deliverables + +1. G3 recovery implementation if evidence supports it +2. regression tests for the recovered pattern +3. G3 recovery report + +### Acceptance Criteria + +1. no scene-name hardcoding +2. no gate relaxation +3. recovered fields are traceable to source evidence +4. existing `G3` canonical and real-sample tests pass + +## Phase 3: Small-Bucket Recovery Slice + +### Objective + +Handle smaller buckets only after the G3 slice is complete or explicitly deferred. + +### Tasks + +1. inspect `local_doc_pipeline = 5` +2. inspect `multi_mode_request = 4` +3. inspect `single_request_enrichment = 2` +4. inspect `host_bridge_workflow = 1` +5. choose at most one bounded non-G3 recovery slice +6. preserve existing real-sample passes for `G1-E`, `G2`, `G6`, `G7` + +### Deliverables + +1. small-bucket recovery decision report +2. optional bounded implementation and tests + +### Acceptance Criteria + +1. only one small-bucket slice is implemented in this roadmap +2. no `G8` attachment/local document runtime is started +3. no full host runtime transport is started + +## Phase 4: Bootstrap Target Isolation + +### Objective + +Keep the single `page_state_eval + bootstrap_target` record separate. + +### Tasks + +1. identify the bootstrap target record +2. preserve it as a separate future input +3. do not implement login recovery +4. produce bootstrap isolation note + +### Deliverables + +1. bootstrap isolation note + +### Acceptance Criteria + +1. bootstrap target does not pollute G3 or small-bucket recovery +2. no login or bootstrap auto-recovery is implemented + +## Phase 5: Follow-Up Sweep and Coverage Delta + +### Objective + +Measure the impact of bounded recovery work. + +### Tasks + +1. rerun the fixed `102` scene sweep +2. produce a new follow-up result +3. compare against the reconciled baseline: + - auto-pass delta + - fail-closed-known delta + - actionable coverage delta + - timeout count + - adjudicated host-bridge count +4. publish coverage delta report + +### Deliverables + +1. `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-coverage-delta-report.md` +3. `docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-roadmap-closure-report.md` + +### Acceptance Criteria + +1. scene set remains exactly `102` +2. improvements are measured, not assumed +3. execution board remains unchanged +4. fail-closed count only drops when contracts close or become more specifically isolated + +## Milestone Order + +The order is fixed: + +1. Phase 0: freeze fail-closed baseline +2. Phase 1: build inventory and taxonomy +3. Phase 2: G3 recovery slice +4. Phase 3: small-bucket recovery slice +5. Phase 4: bootstrap target isolation +6. Phase 5: follow-up sweep and delta + +Do not start implementation before Phase 1 is complete. + +Do not start small-bucket recovery before the G3 slice is completed or explicitly deferred with reasons. + +## Completion Criteria + +This roadmap is complete when: + +1. all `48` structured fail-closed records are inventoried and labeled +2. the `35` G3 records are split into actionable contract-gap groups +3. at least the highest-value repeated recoverable pattern is either implemented or explicitly deferred +4. small buckets are inspected and at most one bounded slice is implemented +5. the bootstrap target remains isolated +6. a follow-up sweep quantifies coverage delta +7. no new family is introduced + +## Stop Statement + +Stop after the follow-up sweep, delta report, and closure report. + +Do not automatically update the execution board or start another roadmap inside this plan. diff --git a/docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md b/docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md new file mode 100644 index 0000000..2145a61 --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-structured-fail-closed-residual-13-closure-plan.md @@ -0,0 +1,150 @@ +# Structured Fail-Closed Residual 13 Closure Plan + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Upstream Design: `docs/superpowers/specs/2026-04-19-structured-fail-closed-residual-13-closure-design.md` +> Fixed Input: `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` + +## Plan Intent + +Turn the remaining `13` `framework-structured-fail-closed` scenes into a controlled residual closure sequence. + +This plan is a coordinator plan. It does not directly implement code. Implementation must happen only in bounded child plans declared below. + +## Fixed Input Bucket + +The fixed input bucket is the `13` scenes with: + +`reconciliationCandidateStatus = framework-structured-fail-closed` + +from: + +`tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` + +## Residual Routes + +### Residual Route A: G3 Residual Closure + +Fixed input: + +1. `sweep-007-scene` / `95598供电服务月报` +2. `sweep-039-scene` / `故障报修工单信息统计表` +3. `sweep-068-scene` / `输变电设备运行分析报告` +4. `sweep-084-scene` / `巡视计划完成情况自动检索` + +Expected child plan: + +`2026-04-19-g3-residual-4-workflow-evidence-closure-plan.md` + +Allowed implementation area: + +1. G3 workflow evidence recovery. +2. G3 contract assembly. +3. bounded G3 route-local validation. + +Forbidden: + +1. G8 runtime. +2. G6 host bridge runtime. +3. new family creation. + +### Residual Route B: G2 Residual Closure + +Fixed input: + +1. `sweep-018-scene` / `白银线损周报` +2. `sweep-071-scene` / `台区线损大数据-月_周累计线损率统计分析` + +Expected child plan: + +`2026-04-19-g2-residual-2-readiness-closure-plan.md` + +Allowed implementation area: + +1. G2 readiness interpretation. +2. G2 mode/request/response contract correction. +3. bounded G2 route-local validation. + +Forbidden: + +1. changing G2 real-sample pass semantics; +2. adding a new G2 variant family; +3. route drift into host bridge. + +### Residual Route C: Boundary Residual Decision + +Fixed input: + +1. `sweep-033-scene` / `供电可靠率指标统计表` +2. `sweep-034-scene` / `供电可靠性数据质量自查报告月报` +3. `sweep-042-scene` / `国网金昌供电公司营商环境周例会报告` +4. `sweep-051-scene` / `嘉峪关可靠性分析报告` +5. `sweep-074-scene` / `同兴智能安全督查日报` +6. `sweep-085-scene` / `业扩报装管理制度` + +Expected child plan: + +`2026-04-19-boundary-residual-hold-decision-plan.md` + +Allowed action: + +1. decision-only hold/defer classification. +2. no implementation. + +### Residual Route D: Bootstrap Residual Isolation + +Fixed input: + +1. `sweep-091-scene` / `用户停电频次分析监测` + +Expected child plan: + +`2026-04-19-bootstrap-target-residual-isolation-plan.md` + +Allowed action: + +1. bootstrap target isolation. +2. no login recovery implementation. + +### Residual Route E: Residual Follow-Up Reconciliation + +Expected child plan: + +`2026-04-19-residual-13-followup-sweep-and-reconciliation-plan.md` + +Allowed action: + +1. route-local or fixed 13-scene follow-up sweep. +2. reconciliation candidate refresh. +3. no official board update. + +## Phase Order + +1. Run Residual Route A. +2. Run Residual Route B. +3. Run Residual Route C. +4. Run Residual Route D. +5. Run Residual Route E. + +Do not skip to Route E before Routes A through D are complete. + +## Deliverables + +1. residual 13 design. +2. residual 13 coordinator plan. +3. child bounded plans for Routes A through E. + +## Completion Criteria + +1. the 13 residual scenes are fully assigned to residual routes; +2. every residual route has an expected child plan name; +3. mainline residuals are separated from boundary/bootstrap residuals; +4. no implementation is performed directly by this coordinator plan. + +## Stop Statement + +Stop after publishing this coordinator plan and its child plan skeletons. + +Do not modify implementation files under this coordinator plan. + diff --git a/docs/superpowers/plans/2026-04-19-timeout-budget-rerun-hygiene-plan.md b/docs/superpowers/plans/2026-04-19-timeout-budget-rerun-hygiene-plan.md new file mode 100644 index 0000000..1a5243f --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-timeout-budget-rerun-hygiene-plan.md @@ -0,0 +1,144 @@ +# Timeout Budget and Rerun Hygiene Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-19-timeout-budget-rerun-hygiene-design.md` +> Upstream Diagnostic: `docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md` + +## Plan Intent + +Create a bounded timeout-budget and rerun-hygiene layer so budget-sensitive scenes are not collapsed into a single `source-unreadable` bucket. + +This plan is classification and reporting only. It does not change analyzer or generator code. + +## Scope Guardrails + +1. do not modify `src/generated_scene/analyzer.rs` +2. do not modify `src/generated_scene/generator.rs` +3. do not update `scene_execution_board_2026-04-18.json` +4. do not promote scenes +5. do not rerun the full `102` sweep +6. do not treat rerun success as validated pass +7. do not start timeout implementation fixes + +## Fixed Input + +The fixed input is: + +`tests/fixtures/generated_scene/timeout_regression_diagnostic_2026-04-19.json` + +Only the three diagnosed timeout records enter this plan. + +## Phase 0: Freeze Timeout Diagnostic Input + +### Objective + +Freeze the timeout diagnostic records before hygiene mapping. + +### Tasks + +1. read the timeout diagnostic JSON +2. verify total timeout records is `3` +3. verify the label set is: + - `timeout-rerun-pass = 2` + - `timeout-rerun-fail-closed = 1` + +### Deliverables + +1. frozen timeout diagnostic baseline + +### Acceptance Criteria + +1. exactly `3` records enter this hygiene plan +2. no non-timeout scene enters the plan + +## Phase 1: Define Hygiene Mapping + +### Objective + +Map timeout diagnostic results to explicit rerun hygiene statuses. + +### Tasks + +1. map `timeout-rerun-pass` to `rerun-resolved-pass` +2. map `timeout-rerun-fail-closed` to `rerun-resolved-fail-closed` +3. preserve any future timeout as `rerun-still-timeout` +4. preserve any future unexpected exit as `rerun-error` + +### Deliverables + +1. explicit rerun hygiene mapping table + +### Acceptance Criteria + +1. each timeout diagnostic label maps to one hygiene status +2. pass-like rerun and fail-closed rerun remain distinct + +## Phase 2: Build Hygiene Output + +### Objective + +Publish a hygiene-layer view for the three timeout records. + +### Tasks + +1. write `tests/fixtures/generated_scene/timeout_budget_rerun_hygiene_2026-04-19.json` +2. include: + - original timeout status + - diagnostic label + - rerun hygiene status + - elapsed seconds + - report presence + - readiness if present +3. summarize how many records are: + - `rerun-resolved-pass` + - `rerun-resolved-fail-closed` + - `rerun-still-timeout` + - `rerun-error` + +### Deliverables + +1. timeout budget hygiene JSON + +### Acceptance Criteria + +1. all three timeout records appear in the hygiene JSON +2. each has exactly one hygiene status + +## Phase 3: Publish Report + +### Objective + +Publish the bounded timeout hygiene report without changing scene status. + +### Tasks + +1. write `docs/superpowers/reports/2026-04-19-timeout-budget-rerun-hygiene-report.md` +2. explain why `sweep-040-scene` should not be counted the same way as a hard unreadable source +3. explain why `sweep-015-scene` and `sweep-025-scene` are budget-sensitive pass candidates +4. state that this remains a hygiene layer, not a promotion layer + +### Deliverables + +1. timeout budget and rerun hygiene report + +### Acceptance Criteria + +1. report exists +2. no execution board update is made +3. no implementation change is made + +## Completion Criteria + +This plan is complete when: + +1. timeout diagnostic input is frozen +2. rerun hygiene mapping is defined +3. hygiene JSON is published +4. hygiene report is published + +## Stop Statement + +Stop after publishing the timeout hygiene JSON and report. + +Do not start timeout implementation or scene promotion inside this plan. diff --git a/docs/superpowers/plans/2026-04-19-timeout-regression-diagnostic-plan.md b/docs/superpowers/plans/2026-04-19-timeout-regression-diagnostic-plan.md new file mode 100644 index 0000000..5276d7d --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-timeout-regression-diagnostic-plan.md @@ -0,0 +1,178 @@ +# Timeout Regression Diagnostic Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-19-timeout-regression-diagnostic-design.md` +> Upstream Follow-up: `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +## Plan Intent + +Run a bounded diagnostic for the three timeout records after the structured fail-closed improvement follow-up sweep. + +This plan only diagnoses timeout behavior. It does not implement fixes. + +## Scope Guardrails + +1. do not modify `src/generated_scene/analyzer.rs` +2. do not modify `src/generated_scene/generator.rs` +3. do not update `scene_execution_board_2026-04-18.json` +4. do not promote scenes +5. do not add family baselines +6. do not handle the remaining structured fail-closed records +7. do not handle adjudicated host-bridge records +8. do not treat diagnostic rerun success as validated scene pass + +## Fixed Input + +The fixed input is: + +`tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +Only records with `followupStatus = source-unreadable` and reason `generator timeout after 45s` enter this plan. + +Expected fixed set: + +| Scene id | Scene | Type | +| --- | --- | --- | +| `sweep-015-scene` | `任务报表` | persistent timeout | +| `sweep-025-scene` | `力禾动环系统巡视记录` | persistent timeout | +| `sweep-040-scene` | `嘉峪关日报` | regression timeout | + +## Phase 0: Freeze Timeout Inputs + +### Objective + +Freeze the exact timeout set before diagnostics. + +### Tasks + +1. read the follow-up sweep JSON +2. filter `source-unreadable` timeout records +3. verify the count is exactly `3` +4. identify `sweep-040-scene` as the regression timeout + +### Deliverables + +1. frozen timeout input list + +### Acceptance Criteria + +1. exactly `3` timeout records enter diagnostics +2. no non-timeout record enters diagnostics + +## Phase 1: Source Directory Diagnostics + +### Objective + +Determine whether timeout records are likely caused by source scale or source structure. + +### Tasks + +1. inspect each source directory +2. count all files +3. count HTML files +4. count JavaScript files +5. compute total source bytes +6. record the largest files + +### Deliverables + +1. per-scene source diagnostics in JSON + +### Acceptance Criteria + +1. all `3` timeout records have source diagnostics +2. missing directories are reported explicitly + +## Phase 2: Bounded Diagnostic Rerun + +### Objective + +Check whether each timeout completes under a longer diagnostic budget. + +### Tasks + +1. rerun each timeout scene with a diagnostic timeout budget +2. write output under `examples/timeout_regression_diagnostic_2026-04-19` +3. capture exit code +4. capture elapsed seconds +5. record whether a `generation-report.json` is produced +6. do not update any execution status based on the result + +### Deliverables + +1. diagnostic rerun result per timeout scene + +### Acceptance Criteria + +1. each timeout has exactly one diagnostic rerun result +2. rerun success is marked only as diagnostic evidence +3. rerun failure is categorized, not fixed + +## Phase 3: Timeout Labeling + +### Objective + +Assign each timeout one final diagnostic label. + +### Tasks + +1. assign one primary diagnostic label: + - `timeout-rerun-pass` + - `timeout-rerun-fail-closed` + - `timeout-large-source` + - `timeout-command-hang` + - `timeout-nondeterministic` + - `timeout-source-scan-heavy` + - `timeout-unknown` +2. attach secondary labels when useful +3. distinguish persistent timeouts from regression timeout + +### Deliverables + +1. labeled timeout diagnostic JSON + +### Acceptance Criteria + +1. all `3` records have exactly one primary diagnostic label +2. `sweep-040-scene` remains clearly identified as the regression timeout + +## Phase 4: Diagnostic Report + +### Objective + +Publish diagnostic results without starting implementation. + +### Tasks + +1. write `tests/fixtures/generated_scene/timeout_regression_diagnostic_2026-04-19.json` +2. write `docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md` +3. summarize whether the next step should be timeout implementation, rerun hygiene, or no action + +### Deliverables + +1. `tests/fixtures/generated_scene/timeout_regression_diagnostic_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md` + +### Acceptance Criteria + +1. diagnostic output exists +2. report exists +3. no implementation changes are made +4. no execution board update is made + +## Completion Criteria + +This plan is complete when: + +1. the three timeout records are frozen +2. each has source diagnostics +3. each has one diagnostic rerun result +4. each has one final diagnostic label +5. JSON and report are published + +## Stop Statement + +Stop after publishing the timeout diagnostic JSON and report. + +Do not start timeout implementation or status promotion inside this plan. diff --git a/docs/superpowers/plans/2026-04-19-timeout-rerun-hygiene-integration-plan.md b/docs/superpowers/plans/2026-04-19-timeout-rerun-hygiene-integration-plan.md new file mode 100644 index 0000000..7f94fac --- /dev/null +++ b/docs/superpowers/plans/2026-04-19-timeout-rerun-hygiene-integration-plan.md @@ -0,0 +1,140 @@ +# Timeout Rerun Hygiene Integration Plan + +> Date: 2026-04-19 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-19-timeout-rerun-hygiene-integration-design.md` +> Upstream Hygiene: `tests/fixtures/generated_scene/timeout_budget_rerun_hygiene_2026-04-19.json` + +## Plan Intent + +Integrate timeout rerun hygiene into sweep and reconciliation reporting. + +This plan only changes the reporting layer. It does not change scene generation behavior. + +## Scope Guardrails + +1. do not modify `src/generated_scene/analyzer.rs` +2. do not modify `src/generated_scene/generator.rs` +3. do not update `scene_execution_board_2026-04-18.json` +4. do not promote scenes +5. do not rerun the `102` sweep +6. do not start timeout implementation fixes + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` +2. `tests/fixtures/generated_scene/timeout_budget_rerun_hygiene_2026-04-19.json` + +## Phase 0: Freeze Inputs + +### Objective + +Freeze the sweep follow-up and timeout hygiene inputs. + +### Tasks + +1. verify follow-up sweep status counts +2. verify timeout hygiene summary: + - `rerun-resolved-pass = 2` + - `rerun-resolved-fail-closed = 1` + - `rerun-still-timeout = 0` + - `rerun-error = 0` + +### Deliverables + +1. frozen integration input set + +### Acceptance Criteria + +1. only the fixed follow-up and hygiene inputs are used + +## Phase 1: Build Hygiene Overlay + +### Objective + +Attach timeout hygiene results onto raw timeout scenes. + +### Tasks + +1. match timeout hygiene records to the follow-up sweep by `sceneId` +2. preserve raw `source-unreadable` +3. add: + - `hygieneStatus` + - `hygieneInterpretation` +4. map: + - `rerun-resolved-pass -> timeout-as-pass-candidate` + - `rerun-resolved-fail-closed -> timeout-as-fail-closed-candidate` + - `rerun-still-timeout -> timeout-still-unreadable` + - `rerun-error -> timeout-rerun-error` + +### Deliverables + +1. timeout hygiene overlay records + +### Acceptance Criteria + +1. all three timeout scenes receive one overlay status +2. raw status is preserved + +## Phase 2: Build Integrated Summary + +### Objective + +Publish a hygiene-aware timeout summary alongside the raw sweep summary. + +### Tasks + +1. preserve raw follow-up status counts +2. add hygiene-aware timeout interpretation counts +3. summarize: + - `timeout-as-pass-candidate` + - `timeout-as-fail-closed-candidate` + - `timeout-still-unreadable` + - `timeout-rerun-error` + +### Deliverables + +1. integrated summary block + +### Acceptance Criteria + +1. raw and hygiene-aware summaries both exist +2. timeout bucket is no longer lossy in the integrated output + +## Phase 3: Publish Integrated Output + +### Objective + +Publish the bounded reconciliation-friendly hygiene integration output. + +### Tasks + +1. write `tests/fixtures/generated_scene/timeout_rerun_hygiene_integration_2026-04-19.json` +2. write `docs/superpowers/reports/2026-04-19-timeout-rerun-hygiene-integration-report.md` +3. state that this is an interpretation/reporting layer only + +### Deliverables + +1. timeout hygiene integration JSON +2. timeout hygiene integration report + +### Acceptance Criteria + +1. both files exist +2. no execution board update is made +3. no implementation change is made + +## Completion Criteria + +This plan is complete when: + +1. inputs are frozen +2. timeout hygiene overlay is attached +3. integrated raw and hygiene-aware summaries are published +4. JSON and report are written + +## Stop Statement + +Stop after publishing the integration JSON and report. + +Do not start implementation or board updates inside this plan. diff --git a/docs/superpowers/plans/2026-04-20-deterministic-keyword-scoring-refinement-plan.md b/docs/superpowers/plans/2026-04-20-deterministic-keyword-scoring-refinement-plan.md new file mode 100644 index 0000000..de4afb8 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-deterministic-keyword-scoring-refinement-plan.md @@ -0,0 +1,86 @@ +# Deterministic Keyword Scoring Refinement Plan + +> Date: 2026-04-20 +> Design: `2026-04-20-deterministic-keyword-scoring-refinement-design.md` + +## Plan Intent + +Close the 9 deterministic dispatch ambiguity gaps by bounded manifest keyword refinement and dry-run verification. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_2026-04-20.json` +2. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +3. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*/scene.toml` + +## Allowed Files + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*/scene.toml` +2. `tests/fixtures/generated_scene/deterministic_keyword_scoring_refinement_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +4. `docs/superpowers/reports/2026-04-20-deterministic-keyword-scoring-refinement-report.md` + +## Forbidden Files + +1. `src/compat/scene_platform/dispatch.rs` +2. `src/compat/scene_platform/resolvers.rs` +3. `src/generated_scene/analyzer.rs` +4. `src/generated_scene/generator.rs` +5. generated `scripts/*` +6. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Gap Set + +### Tasks + +1. Load readiness gaps from the parent readiness asset. +2. Confirm the fixed gap set is exactly 9 ambiguous dispatch entries. + +### Acceptance Criteria + +1. No additional gap categories are pulled into scope. +2. `sweep-012-scene` remains excluded. + +## Phase 1: Refine Manifest Keywords + +### Tasks + +1. For each fixed gap, identify direct collision partner. +2. Narrow include keywords to distinctive full phrases. +3. Remove broad standalone collision tokens where they create ties. +4. Add explicit exclude keywords only when a pair is mutually exclusive. + +### Acceptance Criteria + +1. The fixed 9 scenes retain non-empty include keywords. +2. No generated script is changed. + +## Phase 2: Dispatch Dry-Run Verification + +### Tasks + +1. Re-run dispatch dry-run for all 101 complete packages. +2. Verify the fixed 9 gaps uniquely select their expected scene by full-name sample. +3. Check that no previously-ready scene regresses into ambiguity or no-match. + +### Acceptance Criteria + +1. `dispatchReady = 101` or all residual gaps are explicitly justified. +2. `ambiguous = 0` unless escalated to a separate runtime scoring plan. + +## Phase 3: Publish Report + +### Tasks + +1. Publish refinement JSON. +2. Publish post-refinement readiness JSON. +3. Publish report. + +### Acceptance Criteria + +1. Report states before/after ready and ambiguous counts. +2. Report states whether runtime scoring changes are needed. + +## Stop Statement + +Stop after refinement assets and report are published. Do not start browser execution, runtime dispatch implementation, or `sweep-012-scene` recovery under this plan. diff --git a/docs/superpowers/plans/2026-04-20-final-skill-human-readable-index-plan.md b/docs/superpowers/plans/2026-04-20-final-skill-human-readable-index-plan.md new file mode 100644 index 0000000..abde513 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-final-skill-human-readable-index-plan.md @@ -0,0 +1,101 @@ +# Final Skill Human-Readable Index Plan + +> Date: 2026-04-20 +> Parent Plan: `2026-04-19-scene-skill-102-final-materialization-plan.md` +> Design: `2026-04-20-final-skill-human-readable-index-design.md` + +## Plan Intent + +Add human-readable lookup and metadata to the final materialized skill set so reviewers can identify which `sweep-xxx-scene` skill maps to which business scene. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +3. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` +4. `examples/scene_skill_102_final_materialization_2026-04-19` + +## Allowed Files + +1. `examples/scene_skill_102_final_materialization_2026-04-19/SCENE_INDEX.md` +2. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +3. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*/SKILL.toml` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*/SKILL.md` +5. `docs/superpowers/reports/2026-04-20-final-skill-human-readable-index-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +5. generated `scripts/*` +6. existing materialization manifest and failures assets + +## Phase 0: Freeze Metadata Boundary + +### Tasks + +1. Confirm final materialization root exists. +2. Confirm official board has 102 scene mappings. +3. Confirm this plan does not repair failed packages. + +### Acceptance Criteria + +1. Scope is metadata/index only. +2. Stable `sweep-xxx-scene` ids are preserved. + +## Phase 1: Build Human-Readable Mapping + +### Tasks + +1. Load scene id and scene name from official board. +2. Load materialization status from final materialization manifest and failures asset. +3. Produce 102 mapping rows. + +### Acceptance Criteria + +1. Row count is 102. +2. `sweep-012-scene` is included and marked failed. + +## Phase 2: Publish Index Assets + +### Tasks + +1. Write `SCENE_INDEX.md`. +2. Write `scene_skill_102_index.json`. + +### Acceptance Criteria + +1. Index files are present. +2. Index files include scene id, scene name, archetype, readiness, status, and skill directory. + +## Phase 3: Normalize Skill Metadata + +### Tasks + +1. For each complete package, update `SKILL.toml` readable fields while preserving `[skill].name`. +2. For each complete package, update `SKILL.md` readable summary. +3. Skip failed packages that lack required files. + +### Acceptance Criteria + +1. Complete packages expose readable scene names. +2. Failed packages remain explicit failures. +3. Generated scripts are not modified. + +## Phase 4: Publish Report + +### Tasks + +1. Publish human-readable index report. +2. State materialized package count and skipped failed package count. + +### Acceptance Criteria + +1. Report explains how to find scene-to-skill mapping. +2. Report states that no generation or recovery was performed. + +## Stop Statement + +Stop after index assets, metadata normalization, and report are published. Do not start static/mock validation or `sweep-012-scene` recovery under this plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-embedded-dictionary-extraction-hardening-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-embedded-dictionary-extraction-hardening-plan.md new file mode 100644 index 0000000..d82f3a6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-embedded-dictionary-extraction-hardening-plan.md @@ -0,0 +1,47 @@ +# Generated Scene Embedded Dictionary Extraction Hardening Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent route: +> - `embedded_dictionary_extraction_hardening` +> Parent ledger: +> - `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +## Plan Intent + +Implement the first reusable slice for source-side dictionary/tree extraction. + +## Fixed Input Bucket + +Use the bounded bucket: + +1. scenes with declared `org` parameters +2. scenes with source-side dictionary evidence (`city.js`, `dict.js`, `enum.js`, tree/options files) +3. scenes whose current generated `org-dictionary.json` is absent or starter-sized + +This first slice should center on the 10 parameterized scenes that most resemble `sweep-030-scene`. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. route-local generator tests + +## Forbidden Files + +1. no edits to already materialized dictionaries under `examples/` +2. no runtime resolver implementation outside generation output needs +3. no board assets +4. no pseudo-production handoff assets + +## Expected Coverage Delta + +1. generated dictionaries move beyond starter subsets for the bucketed scenes +2. dictionary recovery becomes source-driven rather than hand-seeded + +## Stop Statement + +Stop after the first reusable dictionary-extraction slice is implemented and route-local follow-up assets are published. + +Do not attempt complete organization-tree closure for every scene inside this route plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-invocation-alias-generation-hardening-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-invocation-alias-generation-hardening-plan.md new file mode 100644 index 0000000..28b6583 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-invocation-alias-generation-hardening-plan.md @@ -0,0 +1,47 @@ +# Generated Scene Invocation Alias Generation Hardening Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent route: +> - `alias_generation_hardening` +> Parent ledger: +> - `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +## Plan Intent + +Implement the first reusable slice for natural-language alias generation. + +## Fixed Input Bucket + +Use the bounded bucket: + +1. scenes with source-side alias evidence +2. scenes whose current generated deterministic manifests still expose only narrow keyword coverage +3. high-risk browser-script report scenes where operator wording is likely to diverge from canonical scene names + +This first slice should prefer the densest high-risk alias bucket rather than the full 84-scene route at once. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. route-local generator tests + +## Forbidden Files + +1. no runtime scoring changes in sgClaw dispatch +2. no service-console changes +3. no direct edits to final materialized `scene.toml` +4. no board assets + +## Expected Coverage Delta + +1. generated `include_keywords` become less brittle for the bucketed scenes +2. deterministic invocation becomes less dependent on exact canonical wording + +## Stop Statement + +Stop after the first reusable alias-generation slice is implemented and route-local follow-up assets are published. + +Do not attempt one-shot full alias closure for every scene inside this route plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-parameter-default-semantics-hardening-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-parameter-default-semantics-hardening-plan.md new file mode 100644 index 0000000..33c5419 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-parameter-default-semantics-hardening-plan.md @@ -0,0 +1,47 @@ +# Generated Scene Parameter Default Semantics Hardening Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent route: +> - `parameter_default_semantics_recovery_hardening` +> Parent ledger: +> - `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +## Plan Intent + +Implement the first reusable slice for page-native default period/date/mode recovery. + +## Fixed Input Bucket + +Use the bounded bucket: + +1. scenes with explicit `period` parameters +2. scenes whose source evidence shows implicit month/week/date initialization +3. scenes whose current generated manifests do not encode a reusable default strategy + +This first slice should center on the parameterized monthly/weekly scenes highlighted by the ledger. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. route-local generator tests + +## Forbidden Files + +1. no runtime resolver patching outside generation metadata needs +2. no edits to generated skill bundle under `examples/` +3. no board assets +4. no pseudo-production assets + +## Expected Coverage Delta + +1. generated parameter metadata can preserve source-side default semantics for the bucketed scenes +2. callers are no longer forced to supply values that the source page itself normally supplies + +## Stop Statement + +Stop after the first reusable default-semantics slice is implemented and route-local follow-up assets are published. + +Do not expand to all possible date semantics inside this route plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-resolver-request-mapping-hardening-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-resolver-request-mapping-hardening-plan.md new file mode 100644 index 0000000..9557e68 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-resolver-request-mapping-hardening-plan.md @@ -0,0 +1,47 @@ +# Generated Scene Resolver Request Mapping Hardening Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent route: +> - `resolver_request_mapping_hardening` +> Parent ledger: +> - `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +## Plan Intent + +Implement the first reusable mapping slice for request-field recovery. + +## Fixed Input Bucket + +Use the bounded bucket: + +1. scenes with explicit `org` and/or `period` params +2. scenes whose source evidence shows request-field tokens like `orgno`, `fdate`, `weekSfdate`, `weekEfdate` +3. scenes currently lacking explicit generated request-mapping metadata + +This first slice is expected to center on the parameterized `multi_mode_request` family and adjacent structured-request scenes. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. route-local generator tests + +## Forbidden Files + +1. no edits to final materialized skill bundle +2. no execution-board assets +3. no runtime / browser callback host +4. no service console assets + +## Expected Coverage Delta + +1. introduce reusable request-field mapping metadata rather than scene-name patches +2. reduce `resolver_to_request_mapping_gap` in the highest-signal parameterized bucket + +## Stop Statement + +Stop after the first reusable mapping slice is implemented and route-local follow-up assets are published. + +Do not yet attempt full 102-scene closure inside this route plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-plan.md new file mode 100644 index 0000000..e82944b --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-plan.md @@ -0,0 +1,143 @@ +# Generated Scene Rule Hardening Route Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent roadmap: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` +> Parent design: +> - `docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-design.md` +> Upstream ledger: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-plan.md` + +## Plan Intent + +Convert the completed runtime-semantics ledger into a bounded hardening-route sequence. + +This stage decides execution order and the next child implementation plans. It does not change code yet. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md` + +## Scope Guardrails + +Allowed: + +1. cluster scenes by reusable route +2. freeze route order +3. define bounded child implementation plans +4. define rematerialization dependency +5. define validation refresh dependency + +Forbidden: + +1. no implementation changes in `src/` +2. no skill manifest changes +3. no rematerialization execution +4. no validation reruns +5. no inner-network execution + +## Phase 0: Freeze Route Order + +### Objective + +Turn the ledger into one fixed route order for downstream implementation. + +### Ordered Routes + +1. `resolver_request_mapping_hardening` +2. `runtime_url_classification_hardening` +3. `embedded_dictionary_extraction_hardening` +4. `parameter_default_semantics_recovery_hardening` +5. `alias_generation_hardening` + +### Acceptance + +1. the order is explicit and no longer derived ad hoc during later implementation + +## Phase 1: Build Route Clusters + +### Objective + +Cluster scenes from the ledger into reusable route buckets. + +### Tasks + +1. count all scenes covered by each route +2. identify the densest scene families per route +3. identify route-local anchor scenes + +### Acceptance + +1. each route has a stable implementation bucket definition + +## Phase 2: Define Bounded Child Implementation Plans + +### Objective + +Create one bounded implementation child plan for each top route. + +### Required child plans + +1. `2026-04-20-generated-scene-resolver-request-mapping-hardening-plan.md` +2. `2026-04-20-generated-scene-runtime-url-classification-hardening-plan.md` +3. `2026-04-20-generated-scene-embedded-dictionary-extraction-hardening-plan.md` +4. `2026-04-20-generated-scene-parameter-default-semantics-hardening-plan.md` +5. `2026-04-20-generated-scene-invocation-alias-generation-hardening-plan.md` + +### Acceptance + +1. each child plan has a fixed scope and stop rule +2. no child plan is scene-name hardcoded as its whole purpose + +## Phase 3: Declare Rematerialization Dependency + +### Objective + +Make full 102-scene rematerialization a mandatory downstream stage after route execution. + +### Tasks + +1. define `generated-scene-runtime-semantics-rematerialization-refresh-plan` +2. freeze it as required after implementation + +### Acceptance + +1. no route may be considered complete without rematerialization + +## Phase 4: Declare Validation Refresh Dependency + +### Objective + +Make validation refresh mandatory after rematerialization. + +### Tasks + +1. define `generated-scene-runtime-semantics-validation-refresh-plan` +2. require refresh of: + - deterministic invocation readiness + - natural-language parameter readiness + - static validation + - direct mock execution + - pseudo-production handoff + +### Acceptance + +1. no route may be considered fully closed until validation assets are refreshed + +## Deliverables + +1. route design / sequencing report +2. route cluster JSON +3. bounded child-plan list for the five routes + +## Stop Statement + +Stop after: + +1. publishing the route design / sequencing assets +2. publishing the five child implementation plans +3. publishing rematerialization and validation-refresh dependency plans + +Do not execute route implementation inside this plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-sequence-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-sequence-plan.md new file mode 100644 index 0000000..4fe87eb --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-sequence-plan.md @@ -0,0 +1,34 @@ +# Generated Scene Rule Hardening Route Sequence Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent design: +> - `docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-sequence-design.md` + +## Plan Intent + +Publish the bounded child-plan tree that follows the completed runtime-semantics ledger. + +## Fixed Sequence + +1. `generated-scene-resolver-request-mapping-hardening` +2. `generated-scene-runtime-url-classification-hardening` +3. `generated-scene-embedded-dictionary-extraction-hardening` +4. `generated-scene-parameter-default-semantics-hardening` +5. `generated-scene-invocation-alias-generation-hardening` +6. `generated-scene-runtime-semantics-rematerialization-refresh` +7. `generated-scene-runtime-semantics-validation-refresh` + +## Deliverables + +1. route cluster JSON +2. route sequence report +3. five bounded child implementation plans +4. one rematerialization refresh dependency plan +5. one validation refresh dependency plan + +## Stop Statement + +Stop after publishing the child-plan tree. + +Do not implement any route in this plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-gap-analysis-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-gap-analysis-plan.md new file mode 100644 index 0000000..af283b6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-gap-analysis-plan.md @@ -0,0 +1,122 @@ +# Generated Scene Runtime Semantics Gap Analysis Plan + +> Status: Superseded by `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` + +## Parent + +- Parent design: [2026-04-20-generated-scene-runtime-semantics-gap-analysis-design.md](/D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-20-generated-scene-runtime-semantics-gap-analysis-design.md) + +## Goal + +Analyze the 102 final generated scene skills for runtime-semantics divergence, using `sweep-030-scene` as the anchor case and systematizing the five gap classes exposed during inner-network validation. + +This plan is analysis-only. + +## Fixed Inputs + +- `examples/scene_skill_102_final_materialization_2026-04-19/skills` +- `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` +- Anchor source: + - `D:/desk/智能体资料/全量业务场景/一平台场景/台区线损大数据-月_周累计线损率统计分析` + +## Boundaries + +Allowed: + +- Read skill manifests, reports, references, and selected source-scene evidence +- Produce JSON inventory and report + +Forbidden: + +- No edits in `src/` +- No edits to generated skills +- No rerun materialization +- No execution board updates +- No pseudo-production execution +- No implementation patch for any scene + +## Phase 0: Freeze Gap Taxonomy + +Tasks: + +1. Fix the five runtime-semantics gap classes from the anchor case +2. Define high / medium / low risk buckets +3. Lock analysis outputs and stop rule + +Acceptance: + +1. The five gap classes are explicit and stable +2. The plan remains analysis-only + +## Phase 1: Anchor-Case Evidence Extraction + +Tasks: + +1. Read `sweep-030-scene` generated assets: + - `scene.toml` + - `references/generation-report.json` + - `references/org-dictionary.json` + - generated script +2. Read source-scene evidence from the original `台区线损大数据-月_周累计线损率统计分析` +3. Record direct evidence for: + - alias gap + - dictionary recovery gap + - parameter default semantics gap + - resolver-to-request mapping gap + - runtime URL semantics gap + +Acceptance: + +1. `sweep-030-scene` has explicit evidence for each applicable gap class + +## Phase 2: 102-Scene Inventory Scan + +Tasks: + +1. Scan all 102 final skills +2. Extract: + - deterministic keywords + - params presence + - dictionary reference presence + - bootstrap target presence + - generation-report URL evidence +3. Tag scenes with likely gap classes using bounded heuristics + +Acceptance: + +1. Every scene gets a runtime-semantics record +2. Every scene has `riskLevel` and `gaps` + +## Phase 3: Family / Archetype Grouping + +Tasks: + +1. Group findings by archetype / family +2. Count gap incidence by bucket +3. Separate: + - generator-level fix candidates + - runtime-only residuals + +Acceptance: + +1. Summary counts exist per gap type and per archetype +2. Report can distinguish generator vs runtime responsibilities + +## Phase 4: Publish Analysis Assets + +Deliverables: + +1. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_gap_analysis_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-generated-scene-runtime-semantics-gap-analysis-report.md` + +Acceptance: + +1. All 102 scenes are represented +2. `sweep-030-scene` is explicitly called out as anchor evidence +3. The report recommends next implementation routes, but does not execute them + +## Stop Statement + +Stop after publishing the JSON inventory and report. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-rematerialization-refresh-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-rematerialization-refresh-plan.md new file mode 100644 index 0000000..ab7fba6 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-rematerialization-refresh-plan.md @@ -0,0 +1,34 @@ +# Generated Scene Runtime Semantics Rematerialization Refresh Plan + +> Date: 2026-04-20 +> Status: Draft +> Dependency stage: +> - post route implementation + +## Plan Intent + +Make full 102-scene rematerialization mandatory after runtime-semantics hardening routes land. + +## Fixed Inputs + +1. completed route-local hardening reports +2. current canonical final skill root +3. current final materialization manifest/failure assets + +## Required Outputs + +1. refreshed final 102-skill materialization directory +2. refreshed materialization manifest +3. refreshed materialization failures asset +4. refreshed scene index / metadata layer + +## Guardrails + +1. no route may be considered complete without this refresh +2. rematerialization must use hardened generator rules, not manual skill edits + +## Stop Statement + +Stop after publishing the rematerialization refresh plan. + +Do not execute rematerialization inside this dependency plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-validation-refresh-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-validation-refresh-plan.md new file mode 100644 index 0000000..2b528c9 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-validation-refresh-plan.md @@ -0,0 +1,29 @@ +# Generated Scene Runtime Semantics Validation Refresh Plan + +> Date: 2026-04-20 +> Status: Draft +> Dependency stage: +> - post rematerialization refresh + +## Plan Intent + +Make validation refresh mandatory after runtime-semantics rematerialization. + +## Required Refresh Layers + +1. deterministic invocation readiness +2. natural-language parameter readiness +3. static validation +4. direct mock execution +5. pseudo-production handoff assets + +## Guardrails + +1. validation must consume the refreshed canonical 102-skill bundle +2. old validation assets may not be reused as proof of the hardened bundle + +## Stop Statement + +Stop after publishing the validation refresh plan. + +Do not execute validation refresh inside this dependency plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-runtime-url-classification-hardening-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-url-classification-hardening-plan.md new file mode 100644 index 0000000..a3b900f --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-runtime-url-classification-hardening-plan.md @@ -0,0 +1,47 @@ +# Generated Scene Runtime URL Classification Hardening Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent route: +> - `runtime_url_classification_hardening` +> Parent ledger: +> - `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +## Plan Intent + +Implement the first reusable slice that separates runtime URL roles during generation. + +## Fixed Input Bucket + +Use the bounded bucket: + +1. scenes with strong source evidence for multiple URL roles +2. scenes whose current generated manifest only exposes `target_url` +3. high-signal browser-script scenes where runtime context URL and module-route URL are likely to diverge + +This first slice should focus on the highest-risk parameterized browser families before broader expansion. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. route-local generator tests + +## Forbidden Files + +1. no callback-host/runtime implementation +2. no service-console changes +3. no direct edits to generated skills +4. no board or validation assets + +## Expected Coverage Delta + +1. generated metadata can distinguish app-entry/runtime-context/module-route roles +2. callers are no longer forced to guess `page_url` semantics for the bucketed scenes + +## Stop Statement + +Stop after the first reusable URL-classification slice is implemented and route-local follow-up assets are published. + +Do not expand to every scene in this route plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-source-evidence-cross-scan-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-source-evidence-cross-scan-plan.md new file mode 100644 index 0000000..2c41178 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-source-evidence-cross-scan-plan.md @@ -0,0 +1,94 @@ +# Generated Scene Source Evidence Cross-Scan Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent roadmap: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` +> Parent design: +> - `docs/superpowers/specs/2026-04-20-generated-scene-source-evidence-cross-scan-design.md` + +## Goal + +Perform a bounded source-first cross-scan over the original 102 scene directories so the project can identify which scenes share the same runtime-semantics risk family as `sweep-030-scene`. + +This plan is analysis-only. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. source root: + - `D:/desk/智能体资料/全量业务场景/一平台场景` + +## Boundaries + +Allowed: + +1. map the current 102 scenes to original source directories +2. scan bounded source evidence +3. publish JSON inventory and report + +Forbidden: + +1. no edits in `src/` +2. no edits to generated skills +3. no rematerialization +4. no validation reruns +5. no execution board updates + +## Phase 0: Freeze Scene Mapping + +Tasks: + +1. derive the exact 102-scene source directory mapping +2. validate that each scene maps to one source directory or an explicit missing record + +Acceptance: + +1. all 102 scenes have a source mapping status + +## Phase 1: Run Bounded Source Evidence Scan + +Tasks: + +1. scan for alias evidence +2. scan for dictionary evidence +3. scan for default parameter evidence +4. scan for request mapping evidence +5. scan for runtime URL evidence + +Acceptance: + +1. each scene has evidence flags +2. representative evidence files are recorded where found + +## Phase 2: Build Cross-Scan Ledger + +Tasks: + +1. write one record per scene +2. tag scenes with source-side risk hints +3. explicitly identify scenes that look similar to `sweep-030-scene` + +Acceptance: + +1. all 102 scenes appear in the ledger +2. the anchor case is clearly represented + +## Phase 3: Publish Assets + +Deliverables: + +1. `tests/fixtures/generated_scene/generated_scene_source_evidence_cross_scan_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-generated-scene-source-evidence-cross-scan-report.md` + +Acceptance: + +1. the JSON can be used as the next input to the runtime-semantics ledger stage +2. the report summarizes the five evidence families across the 102-scene set + +## Stop Statement + +Stop after publishing the JSON inventory and report. + +Do not start rule-hardening or rematerialization in this plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md new file mode 100644 index 0000000..35eb0d9 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md @@ -0,0 +1,214 @@ +# Generated Scene Source-First Runtime Semantics Hardening Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent design: `docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-design.md` + +## Plan Intent + +Replace the weaker generated-skill-first analysis path with a stronger source-first roadmap: + +1. scan all 102 original source scenes +2. detect scenes that can reproduce the same runtime-semantics defect classes exposed by `sweep-030-scene` +3. convert those findings into rule-level hardening routes +4. require full 102-scene rematerialization after rule changes +5. refresh the full validation stack after rematerialization + +## Why This Plan Exists + +The project goal is not to describe already-surfaced gaps after they break in inner-network testing. + +The goal is to prevent the same class of defect from reappearing across the remaining source scenes. + +Therefore this plan is driven by original source-scene evidence, not generated skill artifacts alone. + +## Fixed Inputs + +1. Original source root: + - `D:/desk/智能体资料/全量业务场景/一平台场景` +2. Current final generated skills: + - `examples/scene_skill_102_final_materialization_2026-04-19/skills` +3. Current 102-skill materialization manifest +4. Current invocation / parameter readiness assets +5. `sweep-030-scene` inner-network runtime findings + +## Scope Guardrails + +Allowed: + +1. scan all 102 original source-scene directories +2. compare source evidence against current generated skills +3. produce risk ledgers, reports, and downstream bounded plans + +Forbidden in this parent plan: + +1. no implementation changes in `src/` +2. no skill manifest edits +3. no rematerialization execution yet +4. no validation reruns yet +5. no inner-network patching as a substitute for source-first analysis + +## Workstreams + +1. `WS1` Source Evidence Scan +2. `WS2` Runtime-Semantics Risk Ledger +3. `WS3` Rule Hardening Route Design +4. `WS4` Full Rematerialization and Validation Refresh Planning + +## Phase 0: Freeze Parent Scope + +### Objective + +Make this the new parent roadmap for generated-scene runtime semantics hardening. + +### Tasks + +1. freeze the five gap classes +2. freeze the source-first principle +3. freeze rematerialization as a required downstream step + +### Acceptance + +1. future work must start from source-scene evidence +2. future fixes must be rule-level before scene-level + +## Phase 1: Full 102 Source Cross-Scan + +### Objective + +Systematically scan the original 102 source scenes for high-signal evidence related to the five runtime-semantics gap classes. + +### Required scan targets + +1. dictionary / enum / tree files +2. default parameter logic +3. request payload field names +4. runtime URL candidates +5. operator-facing wording and alias sources + +### Tasks + +1. map each scene id to its original source directory +2. run a bounded evidence scan over all 102 source directories +3. tag source-side evidence flags per scene + +### Deliverables + +1. source evidence scan JSON +2. source evidence scan report + +### Acceptance + +1. all 102 scenes have source evidence flags +2. `sweep-030-scene` is validated as anchor evidence + +## Phase 2: Build the Source-First Runtime Semantics Ledger + +### Objective + +Merge source-side evidence with generated-skill evidence into a full runtime-semantics risk ledger. + +### Tasks + +1. compare source evidence with generated manifests and references +2. assign gap classes per scene +3. assign risk level per scene +4. distinguish: + - generator-level rule gap + - runtime-only residual + +### Deliverables + +1. `generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` +2. source-first runtime semantics report + +### Acceptance + +1. all 102 scenes are represented +2. each scene has `gaps`, `riskLevel`, and `recommendedFixRoutes` + +## Phase 3: Convert Ledger into Rule-Hardening Routes + +### Objective + +Turn the source-first ledger into bounded implementation routes that modify reusable generation rules rather than scene-specific patches. + +### Candidate hardening routes + +1. alias generation hardening +2. embedded dictionary extraction hardening +3. parameter default semantics recovery hardening +4. resolver-to-request mapping hardening +5. runtime URL classification hardening + +### Tasks + +1. count scenes affected by each route +2. prioritize routes by coverage gain and reuse +3. define bounded implementation slices for the top routes + +### Deliverables + +1. child-plan sequence for runtime semantics hardening +2. bounded route plans for top reusable fixes + +### Acceptance + +1. no route is scene-name hardcoded +2. route priority is based on 102-scene reuse, not anecdotal debugging order + +## Phase 4: Require Full 102 Rematerialization + +### Objective + +Ensure that hardened rules are propagated into the final generated skill inventory. + +### Tasks + +1. define full 102 rematerialization as mandatory after route implementation +2. define materialization outputs that must be refreshed +3. define how canonical final skill bundle is replaced + +### Deliverables + +1. full rematerialization refresh plan + +### Acceptance + +1. no runtime-semantics hardening route may be considered complete without rematerialization + +## Phase 5: Require Validation Refresh + +### Objective + +Refresh downstream validation after rematerialization so improved rules are measured end-to-end. + +### Required refresh layers + +1. deterministic invocation readiness +2. natural-language parameter readiness +3. static validation +4. direct mock execution +5. pseudo-production handoff refresh + +### Deliverables + +1. validation refresh plan + +### Acceptance + +1. the new final 102-skill bundle is revalidated before more inner-network testing + +## Immediate Next Output + +This parent plan should immediately lead to a new bounded child plan: + +- `2026-04-20-generated-scene-source-evidence-cross-scan-plan.md` + +That child plan should perform the actual source cross-scan over the 102 original scenes. + +## Stop Statement + +Stop after publishing this parent plan and its design. + +Do not execute the source cross-scan or implementation inside this plan. diff --git a/docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-plan.md b/docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-plan.md new file mode 100644 index 0000000..7b09ee3 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-plan.md @@ -0,0 +1,143 @@ +# Generated Scene Source-First Runtime Semantics Ledger Plan + +> Date: 2026-04-20 +> Status: Draft +> Parent roadmap: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` +> Parent design: +> - `docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-design.md` +> Upstream completed step: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-evidence-cross-scan-plan.md` + +## Plan Intent + +Build the full source-first runtime-semantics ledger for the current 102-scene set. + +This stage exists to convert the completed source cross-scan into a reusable comparison ledger before any analyzer/generator hardening route is defined. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_source_evidence_cross_scan_2026-04-20.json` +2. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +3. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +5. `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` +6. `sweep-030-scene` inner-network findings already established in prior discussion and analysis assets + +## Scope Guardrails + +Allowed: + +1. read source cross-scan outputs +2. read current generated skills and references +3. compare source evidence with generated evidence +4. assign gap classes, risk levels, and route hints +5. publish ledger JSON and report + +Forbidden: + +1. no implementation changes in `src/` +2. no manifest or script edits +3. no rematerialization +4. no validation reruns +5. no execution-board update +6. no inner-network testing + +## Phase 0: Freeze Ledger Inputs + +### Objective + +Make the cross-scan asset and current generated-skill assets the only valid inputs for this ledger stage. + +### Tasks + +1. verify the cross-scan JSON parses +2. verify all 102 scenes are represented +3. verify the current generated skill root is readable + +### Acceptance + +1. the ledger stage starts from a stable 102-scene evidence base + +## Phase 1: Build Per-Scene Comparison Records + +### Objective + +For each scene, merge source evidence with generated-skill evidence into one comparison record. + +### Tasks + +1. load source evidence flags, evidence files, alias samples, request tokens, and runtime URL samples +2. read current scene-level generated manifests/references as needed +3. summarize generated-side evidence for: + - invocation aliases + - dictionaries + - parameter defaults + - request mapping + - runtime URL roles +4. write one comparison record per scene + +### Acceptance + +1. all 102 scenes have both source-side and generated-side summaries + +## Phase 2: Assign Gap Classes and Risk Levels + +### Objective + +Convert comparison records into a stable runtime-semantics risk ledger. + +### Tasks + +1. assign `gaps` from the fixed five-class taxonomy +2. assign `riskLevel = high|medium|low` +3. assign: + - `generatorLevelGap` + - `runtimeOnlyResidual` +4. record `comparisonNotes` + +### Acceptance + +1. every scene has `gaps` +2. every scene has `riskLevel` +3. every scene has `recommendedFixRoutes` + +## Phase 3: Aggregate Route-Level Signals + +### Objective + +Produce route-level reuse signals from the scene ledger so the next stage can design bounded hardening routes. + +### Tasks + +1. count scenes carrying each gap class +2. count scenes marked `generatorLevelGap` +3. count scenes marked `runtimeOnlyResidual` +4. identify the highest-density reusable route clusters + +### Acceptance + +1. the ledger can drive downstream route prioritization without returning to anecdotal scene debugging + +## Deliverables + +1. `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md` + +## Expected Coverage + +The ledger should represent: + +1. all 102 scenes +2. all five canonical gap classes +3. source-first route hints derived from the completed cross-scan + +## Stop Statement + +Stop after: + +1. publishing the ledger JSON +2. publishing the ledger report +3. summarizing the highest-reuse hardening routes + +Do not yet create implementation route plans inside this ledger plan. diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-deterministic-invocation-readiness-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-deterministic-invocation-readiness-plan.md new file mode 100644 index 0000000..e6695d1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-deterministic-invocation-readiness-plan.md @@ -0,0 +1,110 @@ +# Scene Skill 102 Deterministic Invocation Readiness Plan + +> Date: 2026-04-20 +> Design: `2026-04-20-scene-skill-102-deterministic-invocation-readiness-design.md` + +## Plan Intent + +Make the materialized scene skills ready for sgClaw deterministic invocation using natural-language instructions ending with `。。。`. + +This plan does not prove production execution. It only prepares and verifies registry/dispatch readiness. + +## Fixed Inputs + +1. `examples/scene_skill_102_final_materialization_2026-04-19` +2. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Allowed Files + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*/scene.toml` +2. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_samples_2026-04-20.json` +4. `docs/superpowers/reports/2026-04-20-scene-skill-102-deterministic-invocation-readiness-report.md` + +## Forbidden Files + +1. `src/compat/scene_platform/dispatch.rs` +2. `src/compat/scene_platform/resolvers.rs` +3. `src/generated_scene/analyzer.rs` +4. `src/generated_scene/generator.rs` +5. generated `scripts/*` +6. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Invocation Readiness Boundary + +### Tasks + +1. Confirm final materialization root exists. +2. Confirm human-readable index exists. +3. Confirm this plan excludes browser execution and runtime changes. + +### Acceptance Criteria + +1. Scope is deterministic invocation readiness only. +2. `sweep-012-scene` remains outside complete-package normalization. + +## Phase 1: Normalize Deterministic Manifest Metadata + +### Tasks + +1. For each complete package, set `[deterministic].suffix = "。。。"`. +2. Preserve scene id, skill, tool, bootstrap, params, artifact, and postprocess sections. +3. Generate include keywords from: + - full scene name; + - meaningful scene-name tokens; + - archetype/family hints when available. +4. Keep exclude keywords. + +### Acceptance Criteria + +1. All complete packages use suffix `。。。`. +2. Every complete package has non-empty include keywords. +3. Skill directories and scripts are unchanged. + +## Phase 2: Build Invocation Samples + +### Tasks + +For each complete package, generate at least: + +1. full-name sample: `。。。` +2. keyword sample: `。。。` +3. parameterized sample when params exist. + +### Acceptance Criteria + +1. Sample asset contains all complete packages. +2. Failed package is listed as excluded. + +## Phase 3: Dispatch Dry-Run + +### Tasks + +1. Run registry-backed dispatch checks without browser execution. +2. Verify full-name sample selects the expected scene. +3. Record ambiguous or unsupported dispatch results. +4. Record required-param prompts separately from dispatch misses. + +### Acceptance Criteria + +1. Every complete package has a dispatch result. +2. Results distinguish selected, prompt, ambiguous, and no-match. + +## Phase 4: Publish Readiness Report + +### Tasks + +1. Publish readiness JSON. +2. Publish invocation sample JSON. +3. Publish superpowers report. + +### Acceptance Criteria + +1. Report states deterministic-ready count. +2. Report states gap count and gap categories. +3. Report states whether runtime dispatch changes are needed. + +## Stop Statement + +Stop after readiness assets and report are published. Do not start browser execution, static validation, production validation, or runtime dispatch implementation under this plan. diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-full-direct-mock-execution-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-full-direct-mock-execution-plan.md new file mode 100644 index 0000000..6640160 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-full-direct-mock-execution-plan.md @@ -0,0 +1,112 @@ +# Scene Skill 102 Full Direct Mock Execution Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-scene-skill-102-full-direct-mock-execution-design.md` + +## Plan Intent + +Run all `102` final materialized scene skill scripts through a local direct mock runtime. + +This plan expands beyond representative harness execution, but remains fully mock-only and local. + +## Fixed Inputs + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +2. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +3. `tests/fixtures/generated_scene/scene_skill_102_static_validation_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_harness_results_2026-04-20.json` + +## Planned Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md` + +## Allowed Files + +1. new direct mock runner under `tests/` +2. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +3. `docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/**` +5. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Direct Mock Boundary + +### Tasks + +1. Confirm representative mock harness is complete. +2. Confirm this plan does not mutate generated skill packages. +3. Confirm this plan does not use real network, browser, or credentials. + +### Acceptance Criteria + +1. direct mock starts from final materialized skills +2. generated skills remain unchanged + +## Phase 1: Build Direct Mock Runner + +### Tasks + +1. load the `102` scene index +2. locate each generated script +3. reuse fake runtime dependencies by archetype +4. call `buildBrowserEntrypointResult` +5. capture artifact status, row count, failure reason, and mock request log + +### Acceptance Criteria + +1. every scene is attempted +2. no single scene failure aborts the full run +3. no real request is sent + +## Phase 2: Execute Direct Mock For 102 + +### Tasks + +1. run the direct mock runner +2. write per-scene direct mock result +3. classify each scene as: + - `direct-mock-pass` + - `direct-mock-partial` + - `direct-mock-fail` + +### Acceptance Criteria + +1. output record count is `102` +2. each failure has a named reason + +## Phase 3: Publish Report + +### Tasks + +1. summarize direct mock pass/fail +2. summarize results by archetype +3. identify remaining mock-only blockers +4. recommend whether pseudo-production batch selection should start + +### Acceptance Criteria + +1. report does not claim production execution +2. report separates mock pass from production pass + +## Completion Criteria + +This plan is complete when: + +1. all `102` scenes have direct mock results +2. JSON asset is published +3. report is published +4. generated skill packages remain unchanged + +## Stop Statement + +Stop after publishing direct mock execution results and report. + +Do not start pseudo-production batch selection under this plan. + diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-plan.md new file mode 100644 index 0000000..2e21973 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-plan.md @@ -0,0 +1,279 @@ +# Scene Skill 102 Mock Runtime Harness Implementation Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-design.md` +> Input Matrix: `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json` + +## Plan Intent + +Implement and execute bounded mock runtime harnesses for representative generated scene skills. + +This plan validates generated script control flow under fake dependencies. It does not validate production access, real data correctness, or browser-integrated host behavior. + +## Fixed Inputs + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +2. `tests/fixtures/generated_scene/scene_skill_102_static_validation_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_dispatch_dry_run_validation_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json` +5. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_readiness_2026-04-20.json` + +## Planned Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_harness_results_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md` + +## Allowed Files + +1. new mock harness files under `tests/` or `tests/fixtures/generated_scene/` +2. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_harness_results_2026-04-20.json` +3. `docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/**` +5. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Workstreams + +1. `WS1` Mock harness foundation +2. `WS2` Mainline fetch archetype harnesses +3. `WS3` Small bucket harnesses +4. `WS4` Boundary/runtime harnesses +5. `WS5` Integrated result reporting + +## Phase 0: Freeze Mock Runtime Boundary + +### Objective + +Freeze mock validation as a non-production, non-browser, non-network stage. + +### Tasks + +1. Confirm static validation is `102 / 102`. +2. Confirm deterministic dispatch dry-run is `102 / 102`. +3. Confirm this plan does not mutate generated skill packages. +4. Confirm this plan does not require production credentials or network access. + +### Deliverables + +1. baseline section in final mock runtime harness report + +### Acceptance Criteria + +1. no production environment is accessed +2. no generated skill is modified +3. no official board status is changed + +## Phase 1: Mock Harness Foundation + +### Objective + +Create the shared fake runtime primitives used by all representative harnesses. + +### Tasks + +1. define fake `fetch` +2. define fake browser DOM surface +3. define fake artifact writer +4. define fake host bridge callback surface +5. define fake local-doc service surface +6. define common result schema: + - `script-load-pass` + - `mock-runtime-pass` + - `mock-runtime-partial` + - `mock-runtime-fail` + +### Deliverables + +1. shared mock harness implementation + +### Acceptance Criteria + +1. harness foundation does not call real network +2. harness foundation can run without browser or credentials +3. harness foundation can load a generated script from the final materialization root + +## Phase 2: Route 1 - Paginated Enrichment Harness + +### Objective + +Validate the largest archetype bucket first. + +### Fixed Representatives + +1. `sweep-001-scene` +2. `sweep-002-scene` +3. `sweep-003-scene` + +### Tasks + +1. load each representative script +2. provide fake primary page response +3. provide fake enrichment response +4. verify expected request order where observable +5. verify artifact metadata or structured result is produced + +### Deliverables + +1. paginated enrichment mock result records + +### Acceptance Criteria + +1. each representative receives a `mock-runtime-*` status +2. no real request is sent +3. failures include named failure reason + +## Phase 3: Route 2 - G2 And G1-E Fetch Harnesses + +### Objective + +Validate fetch-based mainline small buckets. + +### Fixed Representatives + +`multi_mode_request`: + +1. `sweep-020-scene` +2. `sweep-023-scene` +3. `sweep-030-scene` + +`single_request_enrichment`: + +1. `sweep-013-scene` +2. `sweep-016-scene` +3. `sweep-068-scene` + +### Tasks + +1. run representative scripts with fake fetch +2. verify mode/request paths for multi-mode scenes +3. verify enrichment path for single-request enrichment scenes +4. record pass/fail reason + +### Deliverables + +1. multi-mode request mock result records +2. single-request enrichment mock result records + +### Acceptance Criteria + +1. each representative receives a `mock-runtime-*` status +2. real-sample or production execution is not started + +## Phase 4: Route 3 - Inventory And Page-State Harnesses + +### Objective + +Validate the small specialized buckets. + +### Fixed Representatives + +`multi_endpoint_inventory`: + +1. `sweep-084-scene` +2. `sweep-085-scene` + +`page_state_eval`: + +1. `sweep-066-scene` +2. `sweep-094-scene` + +### Tasks + +1. run multi-endpoint representatives with fake endpoint responses +2. run page-state representatives with fake DOM state +3. record pass/fail reason + +### Deliverables + +1. inventory mock result records +2. page-state mock result records + +### Acceptance Criteria + +1. each representative receives a `mock-runtime-*` status +2. no host browser is required + +## Phase 5: Route 4 - Local-Doc And Host-Bridge Harnesses + +### Objective + +Validate boundary runtime families with fake local-doc and fake host-bridge surfaces. + +### Fixed Representatives + +`local_doc_pipeline`: + +1. `sweep-012-scene` +2. `sweep-017-scene` +3. `sweep-019-scene` + +`host_bridge_workflow`: + +1. `sweep-007-scene` +2. `sweep-009-scene` +3. `sweep-010-scene` + +### Tasks + +1. run local-doc representatives with fake local document query and export responses +2. run host-bridge representatives with fake action and callback completion responses +3. classify boundary failures as mock harness gaps or script contract gaps + +### Deliverables + +1. local-doc mock result records +2. host-bridge mock result records + +### Acceptance Criteria + +1. no real host bridge is invoked +2. no local document service is invoked +3. failures are explicitly categorized + +## Phase 6: Integrated Mock Runtime Report + +### Objective + +Publish representative execution results and propagated matrix interpretation. + +### Tasks + +1. write `scene_skill_102_mock_runtime_harness_results_2026-04-20.json` +2. summarize representative pass/fail by archetype +3. summarize which non-representative scenes are covered only by representative inference +4. identify which archetypes still require direct mock expansion +5. recommend whether to proceed to pseudo-production batch planning + +### Deliverables + +1. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_harness_results_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md` + +### Acceptance Criteria + +1. report distinguishes representative execution from propagated coverage +2. report does not claim production execution +3. report does not update official board + +## Completion Criteria + +This plan is complete when: + +1. every fixed representative has a mock runtime result record +2. integrated mock runtime results JSON is published +3. mock runtime report is published +4. generated skill packages remain unchanged +5. no real browser or production environment was used + +## Stop Statement + +Stop after publishing mock runtime harness results and report. + +Do not start pseudo-production or real-environment validation under this plan. + diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-natural-language-parameter-readiness-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-natural-language-parameter-readiness-plan.md new file mode 100644 index 0000000..4630048 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-natural-language-parameter-readiness-plan.md @@ -0,0 +1,104 @@ +# Scene Skill 102 Natural-Language Parameter Readiness Plan + +> Date: 2026-04-20 +> Design: `2026-04-20-scene-skill-102-natural-language-parameter-readiness-design.md` + +## Plan Intent + +Build a 102-scene natural-language invocation parameter readiness view before pseudo-production testing. + +This plan answers which skills should be invoked with query conditions such as organization and period, which skills currently only support scene-keyword deterministic selection, and which required-param skills have resolver gaps. + +## Fixed Inputs + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +2. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` + +## Allowed Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +2. `tests/fixtures/generated_scene/scene_skill_102_natural_language_invocation_samples_2026-04-20.json` +3. `docs/superpowers/reports/2026-04-20-scene-skill-102-natural-language-parameter-readiness-report.md` + +## Forbidden Files + +1. `src/compat/scene_platform/dispatch.rs` +2. `src/compat/scene_platform/resolvers.rs` +3. `src/generated_scene/analyzer.rs` +4. `src/generated_scene/generator.rs` +5. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*` +6. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Boundary + +### Tasks + +1. Confirm final skill count is `102`. +2. Confirm this plan is analysis-only. +3. Confirm no browser, network, host bridge, or production execution is performed. + +### Acceptance Criteria + +1. No generated skill files are modified. +2. No runtime source files are modified. + +## Phase 1: Parameter Manifest Scan + +### Tasks + +1. Read each `SKILL.toml` for scene name and archetype. +2. Read each `scene.toml` for deterministic suffix, params, and resolver declarations. +3. Record required params and resolver types. +4. Check resolver resources such as dictionary files. + +### Acceptance Criteria + +1. All `102` scenes have one parameter scan record. +2. Required param scenes are explicitly identified. + +## Phase 2: Readiness Classification + +### Tasks + +1. Mark scenes with supported, populated resolver resources as `parameter-ready`. +2. Mark scenes with empty or missing resolver resources as `parameter-gap`. +3. Mark no-param scenes as `parameter-not-required`. +4. Mark no-param scenes with likely filter words as `parameter-implicit-risk`. + +### Acceptance Criteria + +1. Every scene has exactly one primary readiness class. +2. Resolver gaps list concrete file or config reasons. + +## Phase 3: Invocation Sample Generation + +### Tasks + +1. Generate minimal invocation samples for every scene. +2. Generate parameterized samples for scenes with required params. +3. Generate cautionary samples for implicit-risk scenes. +4. Make clear when organization or period wording is not currently parsed. + +### Acceptance Criteria + +1. Sample JSON covers all `102` scenes. +2. Parameterized samples are not generated as if resolver gaps are resolved. + +## Phase 4: Publish Report + +### Tasks + +1. Write readiness JSON. +2. Write invocation sample JSON. +3. Write superpowers report with counts, gaps, and next route. + +### Acceptance Criteria + +1. Report explains why `场景名。。。` is insufficient for parameterized scenes. +2. Report states whether pseudo-production batch input should be regenerated. +3. Stop after report; do not start implementation. + +## Stop Statement + +Stop after readiness assets and report are published. Do not edit runtime, generated skills, board assets, or pseudo-production execution records under this plan. diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-plan.md new file mode 100644 index 0000000..30de66d --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-plan.md @@ -0,0 +1,124 @@ +# Scene Skill 102 Parameter Dictionary And Invocation Template Normalization Plan + +> Date: 2026-04-20 +> Design: `2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-design.md` + +## Plan Intent + +Close the natural-language parameter readiness gap for the fixed `10` required-param scene skills and refresh pseudo-production invocation templates. + +## Fixed Input Bucket + +The only input bucket is the `10` scenes marked `parameter-gap` in: + +`tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` + +## Allowed Files + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills/{fixed-10}/references/org-dictionary.json` +2. `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_natural_language_invocation_samples_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_handoff_2026-04-20.json` +5. `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` +6. `docs/superpowers/reports/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-report.md` + +## Forbidden Files + +1. `src/compat/scene_platform/dispatch.rs` +2. `src/compat/scene_platform/resolvers.rs` +3. `src/generated_scene/analyzer.rs` +4. `src/generated_scene/generator.rs` +5. `examples/scene_skill_102_final_materialization_2026-04-19/skills/*/scripts/*` +6. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Scope + +### Tasks + +1. Confirm the fixed `10` required-param scenes. +2. Confirm all current gaps are empty org dictionaries. +3. Confirm no runtime code changes are needed. + +### Acceptance Criteria + +1. No non-param scene is changed. +2. No browser or production execution is started. + +## Phase 1: Populate Starter Organization Dictionaries + +### Tasks + +1. Write the pseudo-production starter dictionary into each fixed `10` skill. +2. Use the already-tested aliases: + - `兰州公司` + - `兰州供电公司` + - `国网兰州供电公司` + - `城关供电分公司` + - `城关分公司` + - `天水公司` + - `天水供电公司` + - `国网天水供电公司` +3. Mark dictionary provenance as starter, not full production. + +### Acceptance Criteria + +1. All fixed `10` dictionaries are non-empty arrays. +2. Each dictionary contains alias coverage for `兰州公司`. + +## Phase 2: Refresh Parameter Readiness + +### Tasks + +1. Re-scan all `102` skills. +2. Recompute parameter readiness. +3. Verify the fixed `10` move to `parameter-ready`. +4. Keep implicit-risk classification for no-param scenes. + +### Acceptance Criteria + +1. `parameter-gap = 0`. +2. `parameter-ready = 10`. +3. `total scenes = 102`. + +## Phase 3: Refresh Invocation Templates + +### Tasks + +1. Generate parameterized samples for the fixed `10`. +2. Ensure samples include concrete period, e.g. `月累计 2026-03`. +3. Ensure samples keep `。。。` suffix. + +### Acceptance Criteria + +1. All fixed `10` have parameterized sample input. +2. No-param scenes keep minimal invocation samples. + +## Phase 4: Refresh Pseudo-Production Handoff + +### Tasks + +1. Update the selected pseudo-production handoff entries that are in the fixed `10`. +2. Replace bare scene-name inputs with parameterized inputs. +3. Preserve credential policy and evidence collection fields. + +### Acceptance Criteria + +1. Selected required-param scenes no longer use bare `场景名。。。` in handoff. +2. No credentials are written to the repository. + +## Phase 5: Publish Normalization Report + +### Tasks + +1. Publish normalization JSON. +2. Publish superpowers report. +3. State remaining limits explicitly. + +### Acceptance Criteria + +1. Report states that dictionaries are starter dictionaries, not complete production unit trees. +2. Report states next step for pseudo-production execution preparation refresh. + +## Stop Statement + +Stop after dictionaries, readiness assets, invocation samples, handoff, and report are refreshed. Do not run browser, production, or runtime implementation work under this plan. diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-execution-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-execution-plan.md new file mode 100644 index 0000000..6ce853c --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-execution-plan.md @@ -0,0 +1,111 @@ +# Scene Skill 102 Pseudo-Production Batch Execution Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-design.md` + +## Plan Intent + +Run the prepared 10-scene pseudo-production batch in an operator-provided environment and record structured results. + +This plan is bounded to execution and evidence collection for the selected 10 scenes. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_handoff_2026-04-20.json` +2. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_evidence_checklist_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_record_template_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_batch_selection_2026-04-20.json` + +## Planned Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_batch_execution_results_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-execution-report.md` + +## Allowed Files + +1. planned execution result JSON +2. planned execution report +3. redacted evidence summaries if explicitly generated + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/**` +5. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +6. any credential, token, cookie, or secret file + +## Phase 0: Confirm Environment Readiness + +### Tasks + +1. Confirm operator-provided browser/runtime environment exists. +2. Confirm network/session access is provided outside repository. +3. Confirm evidence output location. +4. Confirm redaction rules. + +### Acceptance Criteria + +1. No credentials are stored in repository. +2. Execution does not start unless environment readiness is confirmed externally. + +## Phase 1: Execute Selected Scenes + +### Tasks + +For each selected scene: + +1. use the deterministic invocation input ending with `。。。` +2. execute through sgClaw runtime or agreed quasi-production host +3. collect console log +4. collect network summary +5. capture screenshot if target page is required +6. capture exported artifact if produced +7. record final result state + +### Acceptance Criteria + +1. every selected scene has one execution record +2. every record has exactly one result state +3. failures use the allowed taxonomy + +## Phase 2: Redact And Normalize Evidence + +### Tasks + +1. redact credentials, cookies, tokens, Authorization headers, and private data +2. normalize evidence paths +3. confirm each evidence checklist item is present or explicitly unavailable + +### Acceptance Criteria + +1. no secret material enters repository output +2. missing evidence has a reason + +## Phase 3: Publish Execution Results + +### Tasks + +1. write execution results JSON +2. write execution report +3. summarize pass/blocker/mismatch/runtime-error counts +4. list follow-up blockers + +### Acceptance Criteria + +1. selected scene count remains 10 +2. report does not claim full production certification +3. official board is not updated under this plan + +## Completion Criteria + +This plan is complete when all 10 selected scenes have structured execution records and a redacted execution report is published. + +## Stop Statement + +Stop after publishing execution results and report. + +Do not update official board status under this plan. + diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-plan.md new file mode 100644 index 0000000..9923aad --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-plan.md @@ -0,0 +1,114 @@ +# Scene Skill 102 Pseudo-Production Batch Execution Preparation Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-design.md` + +## Plan Intent + +Prepare the first pseudo-production batch for execution without executing it. + +This plan creates handoff and evidence templates for the 10 selected scenes. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_batch_selection_2026-04-20.json` +2. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +3. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` + +## Planned Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_handoff_2026-04-20.json` +2. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_record_template_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_evidence_checklist_2026-04-20.json` +4. `docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-report.md` + +## Allowed Files + +1. planned JSON assets +2. planned report + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/**` +5. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Preparation Boundary + +### Tasks + +1. Confirm selected batch is exactly 10 scenes. +2. Confirm all 10 are direct-mock-pass. +3. Confirm no browser/network execution happens in this plan. + +### Acceptance Criteria + +1. No production or quasi-production target is invoked. +2. No credentials are requested or stored. + +## Phase 1: Build Environment Handoff + +### Tasks + +1. List required environment inputs for the operator. +2. Map required dependencies per selected scene. +3. Define credential handling rule: outside repository only. + +### Acceptance Criteria + +1. Every selected scene has environment prerequisites. +2. The handoff asset contains no credential values. + +## Phase 2: Build Evidence Checklist + +### Tasks + +1. Define evidence required for each scene. +2. Define evidence file names. +3. Define redaction requirements for logs and screenshots. + +### Acceptance Criteria + +1. Every selected scene has an evidence checklist. +2. Every checklist includes final execution classification. + +## Phase 3: Build Execution Record Template + +### Tasks + +1. Define common execution record fields. +2. Include per-scene placeholders for operator output. +3. Include allowed result states and failure taxonomy. + +### Acceptance Criteria + +1. The template can record pass, blocker, mismatch, and runtime error. +2. The template stores references to evidence files, not credentials. + +## Phase 4: Publish Preparation Report + +### Tasks + +1. Summarize selected batch. +2. Summarize environment handoff. +3. Summarize evidence package structure. +4. Identify next bounded execution plan. + +### Acceptance Criteria + +1. Report states this is preparation-only. +2. Report does not claim pseudo-production execution. + +## Completion Criteria + +This plan is complete when handoff, evidence checklist, record template, and report are published. + +## Stop Statement + +Stop after publishing preparation assets. + +Do not run pseudo-production execution under this plan. + diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-selection-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-selection-plan.md new file mode 100644 index 0000000..349130f --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-pseudoprod-batch-selection-plan.md @@ -0,0 +1,127 @@ +# Scene Skill 102 Pseudo-Production Batch Selection Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-selection-design.md` + +## Plan Intent + +Select the first pseudo-production validation batch from the 102 final materialized skills. + +This plan is selection-only. It does not run pseudo-production execution. + +## Fixed Inputs + +1. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +2. `tests/fixtures/generated_scene/scene_skill_102_static_validation_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_dispatch_dry_run_validation_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +5. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_readiness_2026-04-20.json` + +## Planned Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_batch_selection_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-selection-report.md` + +## Allowed Files + +1. the planned output JSON +2. the planned report + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/**` +5. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Freeze Selection Boundary + +### Tasks + +1. Confirm all 102 scenes are direct-mock-pass. +2. Confirm this plan does not execute browser automation or real network access. +3. Confirm this plan does not update official board status. + +### Acceptance Criteria + +1. Selection starts from a clean `102 / 102` local mock baseline. +2. Selection does not mutate generated skills or runtime code. + +## Phase 1: Build Eligible Candidate Set + +### Tasks + +1. Read pseudo-production readiness records. +2. Keep only `pseudo-prod-ready` scenes. +3. Exclude `real-env-required` scenes from first batch. +4. Join with direct mock results and static/dispatch readiness. + +### Acceptance Criteria + +1. Every selected candidate is static validated. +2. Every selected candidate is dispatch ready. +3. Every selected candidate is direct-mock-pass. + +## Phase 2: Select Balanced First Batch + +### Tasks + +Select `10` scenes with archetype balance: + +1. `paginated_enrichment`: 4 +2. `multi_mode_request`: 2 +3. `single_request_enrichment`: 2 +4. `multi_endpoint_inventory`: 1 +5. `page_state_eval`: 1 + +### Acceptance Criteria + +1. The selected batch contains exactly `10` scenes. +2. The batch excludes host-bridge and local-doc runtime-dependent scenes. +3. Every selected scene has a deterministic invocation input. + +## Phase 3: Define Evidence Checklist + +### Tasks + +For each selected scene, define required evidence: + +1. deterministic invocation input +2. console log +3. network log or request summary +4. screenshot if browser target page is required +5. exported file if produced +6. generation report path +7. failure taxonomy slot + +### Acceptance Criteria + +1. Every selected scene has a complete checklist. +2. Checklist does not require production credentials to be stored in the repository. + +## Phase 4: Publish Selection Report + +### Tasks + +1. Write selection JSON. +2. Write selection report. +3. Summarize selected and deferred scenes. + +### Acceptance Criteria + +1. The report states this is selection-only. +2. The report does not claim pseudo-production execution. +3. The report identifies the next bounded execution plan. + +## Completion Criteria + +This plan is complete when the first pseudo-production batch selection JSON and report are published. + +## Stop Statement + +Stop after publishing selection assets. + +Do not execute pseudo-production validation under this plan. + diff --git a/docs/superpowers/plans/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-plan.md b/docs/superpowers/plans/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-plan.md new file mode 100644 index 0000000..c653251 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-plan.md @@ -0,0 +1,252 @@ +# Scene Skill 102 Static, Mock, And Pseudo-Production Validation Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-design.md` +> Parent Stage: validation after final materialization and deterministic invocation readiness + +## Plan Intent + +Define the next validation stage for the fully materialized `102` scene skill set. + +This plan validates package health, deterministic dispatch readiness, mock runtime feasibility, and pseudo-production readiness. It does not perform real production execution. + +## Fixed Inputs + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +2. `examples/scene_skill_102_final_materialization_2026-04-19/SCENE_INDEX.md` +3. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +4. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +5. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` +6. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` + +## Planned Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_static_validation_2026-04-20.json` +2. `tests/fixtures/generated_scene/scene_skill_102_dispatch_dry_run_validation_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_readiness_2026-04-20.json` +5. `docs/superpowers/reports/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-report.md` + +## Scope Guardrails + +Allowed: + +1. read final materialized skill packages +2. parse `SKILL.toml`, `scene.toml`, and generation reports +3. run deterministic dispatch dry-run without executing selected skills +4. build mock runtime validation matrix +5. publish validation JSON and report assets + +Forbidden: + +1. do not modify `src/generated_scene/analyzer.rs` +2. do not modify `src/generated_scene/generator.rs` +3. do not modify generated skill scripts during this validation plan +4. do not rematerialize the `102` skills +5. do not update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +6. do not start real browser or production environment execution +7. do not require production credentials, SSO, VPN, or real system access + +## Workstreams + +1. `WS1` Static package validation +2. `WS2` Deterministic invocation dry-run validation +3. `WS3` Mock runtime validation matrix +4. `WS4` Pseudo-production readiness checklist +5. `WS5` Validation report and next-stage decision + +## Phase 0: Freeze Validation Baseline + +### Objective + +Freeze the `102` final skill set as the input to validation. + +### Tasks + +1. Confirm final materialization count is `102 / 102`. +2. Confirm materialization failure count is `0`. +3. Confirm deterministic invocation readiness is `102 / 102`. +4. Confirm this plan does not rematerialize skills. + +### Deliverables + +1. validation baseline section in final report + +### Acceptance Criteria + +1. validation begins from the final materialization root +2. no source scene directories are rescanned +3. no generated scene logic is changed + +## Phase 1: Static Package Validation + +### Objective + +Validate that all `102` skill packages are structurally complete and parseable. + +### Tasks + +For each skill: + +1. check `SKILL.toml` +2. check `SKILL.md` +3. check `scene.toml` +4. check `references/generation-report.json` +5. check at least one script under `scripts/` +6. parse TOML and JSON files +7. compare `sceneId`, display name, archetype, readiness, suffix, and keyword fields against index and manifest assets + +### Deliverables + +1. `scene_skill_102_static_validation_2026-04-20.json` + +### Acceptance Criteria + +1. every scene has exactly one static validation record +2. every static failure has a named reason +3. total records equal `102` + +## Phase 2: Deterministic Invocation Dry-Run Validation + +### Objective + +Validate `U+3002 x3` deterministic suffix dispatch selection without executing selected skills. + +### Tasks + +For each skill: + +1. construct one canonical utterance from the scene display name plus the `U+3002 x3` suffix +2. optionally construct one keyword-based utterance when safe +3. dry-run deterministic selection against the skill registry +4. record selected skill id, ambiguity count, and no-match status + +### Deliverables + +1. `scene_skill_102_dispatch_dry_run_validation_2026-04-20.json` + +### Acceptance Criteria + +1. every complete skill has a dispatch dry-run result +2. ambiguous and no-match outcomes are explicit +3. no selected skill is executed + +## Phase 3: Mock Runtime Validation Matrix + +### Objective + +Define and, where safe, prepare mock runtime validation by archetype. + +### Tasks + +1. group `102` skills by workflow archetype +2. identify one to three representatives per archetype +3. define mock dependencies required by each archetype: + - fake fetch + - fake browser DOM + - fake host bridge + - fake local-doc service + - fake artifact writer +4. classify each skill as: + - `mock-covered-by-representative` + - `mock-needs-harness` + - `mock-not-safe-yet` + +### Deliverables + +1. `scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json` + +### Acceptance Criteria + +1. every scene is assigned a mock-runtime coverage status +2. every archetype has a named harness requirement +3. this phase does not require real network or browser credentials + +## Phase 4: Pseudo-Production Readiness Checklist + +### Objective + +Define what evidence is required before real-environment validation can start. + +### Tasks + +For each scene or archetype: + +1. record required runtime dependencies +2. record expected artifact type +3. record whether host bridge, browser, localhost service, or document pipeline is required +4. define required execution evidence: + - console logs + - network logs + - screenshots + - exported files + - generated artifact metadata +5. define failure taxonomy: + - `login-blocked` + - `network-blocked` + - `host-bridge-blocked` + - `data-mismatch` + - `artifact-mismatch` + - `environment-unavailable` + - `runtime-error` + +### Deliverables + +1. `scene_skill_102_pseudoprod_readiness_2026-04-20.json` + +### Acceptance Criteria + +1. every scene has a pseudo-production readiness record +2. every real-environment blocker has a named category +3. no production credentials are required by this phase + +## Phase 5: Publish Validation Report + +### Objective + +Publish one report that separates static readiness, dispatch readiness, mock readiness, and pseudo-production readiness. + +### Tasks + +1. summarize static validation results +2. summarize dispatch dry-run results +3. summarize mock runtime coverage matrix +4. summarize pseudo-production readiness categories +5. recommend whether to start real-environment validation and at what batch size + +### Deliverables + +1. `docs/superpowers/reports/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-report.md` + +### Acceptance Criteria + +1. report explains that `102 / 102` materialization is not the same as `102 / 102` production execution +2. report lists remaining validation blockers, if any +3. report does not promote any scene to real executed-pass + +## Expected Status Outputs + +This plan should produce these independent status counts: + +1. `staticValidationStatus` +2. `dispatchDryRunStatus` +3. `mockRuntimeCoverageStatus` +4. `pseudoProductionReadinessStatus` + +## Completion Criteria + +This plan is complete when: + +1. all planned validation assets are published +2. all `102` scenes have static validation records +3. all `102` scenes have dispatch dry-run records +4. all `102` scenes have mock runtime matrix records +5. all `102` scenes have pseudo-production readiness records +6. the validation report is published + +## Stop Statement + +Stop after publishing static, dispatch, mock-runtime matrix, pseudo-production readiness assets, and the report. + +Do not execute real production validation under this plan. diff --git a/docs/superpowers/plans/2026-04-20-sweep-012-materialization-recovery-plan.md b/docs/superpowers/plans/2026-04-20-sweep-012-materialization-recovery-plan.md new file mode 100644 index 0000000..86b6288 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-sweep-012-materialization-recovery-plan.md @@ -0,0 +1,96 @@ +# Sweep 012 Materialization Recovery Plan + +> Date: 2026-04-20 +> Design: `2026-04-20-sweep-012-materialization-recovery-design.md` + +## Plan Intent + +Recover the only failed final materialization scene package and refresh final materialization/readiness assets to reach `102 / 102` materialized packages. + +## Fixed Input + +`sweep-012-scene / 业扩报装管理制度` + +Source directory: + +`D:/desk/智能体资料/全量业务场景/一平台场景/业扩报装管理制度` + +Final output root: + +`examples/scene_skill_102_final_materialization_2026-04-19` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/scene_generator_test.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-012-scene/**` +5. `examples/scene_skill_102_final_materialization_2026-04-19/SCENE_INDEX.md` +6. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +7. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +8. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` +9. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +10. `docs/superpowers/reports/2026-04-20-sweep-012-materialization-recovery-report.md` + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. other final skill package scripts outside `sweep-012-scene` +3. production/static/mock validation assets + +## Phase 0: Diagnose Single-Scene Failure + +### Tasks + +1. Reproduce current failure for `sweep-012-scene`. +2. Compare analyzer output with official framework expectation. +3. Identify whether the failure is route selection, missing bootstrap, or workflow contract recovery. + +### Acceptance Criteria + +1. Root cause is stated before implementation. + +## Phase 1: Apply Minimal Recovery + +### Tasks + +1. Make the smallest code or fixture correction needed for `sweep-012-scene`. +2. Add or update a focused regression test. + +### Acceptance Criteria + +1. Existing G3/G6 route behavior remains protected. +2. The correction is not generalized beyond evidence-supported behavior. + +## Phase 2: Re-materialize Sweep 012 Only + +### Tasks + +1. Run `sg_scene_generate` only for `sweep-012-scene`. +2. Verify required files exist. +3. Normalize human-readable metadata and deterministic invocation metadata for this package. + +### Acceptance Criteria + +1. `sweep-012-scene` package is complete. +2. Its `scene.toml` uses suffix `。。。`. + +## Phase 3: Refresh Assets + +### Tasks + +1. Patch final materialization manifest for `sweep-012-scene`. +2. Clear final materialization failures. +3. Refresh `SCENE_INDEX.md` and `scene_skill_102_index.json`. +4. Refresh deterministic readiness after keyword refinement. +5. Publish recovery report. + +### Acceptance Criteria + +1. Materialized count is `102`. +2. Failure count is `0`. +3. Deterministic dispatch ready count is `102`. + +## Stop Statement + +Stop after `sweep-012-scene` recovery assets and report are published. Do not start static/mock/production validation under this plan. diff --git a/docs/superpowers/plans/2026-04-20-sweep-015-direct-mock-partial-closure-plan.md b/docs/superpowers/plans/2026-04-20-sweep-015-direct-mock-partial-closure-plan.md new file mode 100644 index 0000000..a2855b0 --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-sweep-015-direct-mock-partial-closure-plan.md @@ -0,0 +1,103 @@ +# Sweep 015 Direct Mock Partial Closure Plan + +> Date: 2026-04-20 +> Status: Draft +> Upstream Design: `docs/superpowers/specs/2026-04-20-sweep-015-direct-mock-partial-closure-design.md` + +## Plan Intent + +Close the only remaining `direct-mock-partial` before pseudo-production batch selection. + +This plan is mock-only and targets one fixed scene: + +- `sweep-015-scene / 任务报表` + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +2. `tests/generated_scene_full_direct_mock_runner.js` +3. `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-015-scene/scripts/collect_sweep_015_scene.js` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-015-scene/references/generation-report.json` + +## Planned Outputs + +1. refreshed `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +2. refreshed `docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md` +3. `docs/superpowers/reports/2026-04-20-sweep-015-direct-mock-partial-closure-report.md` + +## Allowed Files + +1. `tests/generated_scene_full_direct_mock_runner.js` +2. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +3. `docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md` +4. `docs/superpowers/reports/2026-04-20-sweep-015-direct-mock-partial-closure-report.md` + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `examples/scene_skill_102_final_materialization_2026-04-19/skills/**` +5. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Phase 0: Confirm Partial Cause + +### Tasks + +1. Confirm `sweep-015-scene` is the only partial. +2. Confirm the generated script uses `row.status == 5`. +3. Confirm the mock row does not currently satisfy that filter. + +### Acceptance Criteria + +1. The closure target remains exactly one scene. +2. The cause is classified as mock fixture contract mismatch. + +## Phase 1: Patch Mock Fixture Contract + +### Tasks + +1. Add the minimal filter-satisfying field to direct mock fake row data. +2. Keep the change generic enough to support filter-aware mock execution but narrow enough not to mask runtime failures. + +### Acceptance Criteria + +1. No generated skill package is modified. +2. No generator/analyzer code is modified. + +## Phase 2: Rerun Full Direct Mock + +### Tasks + +1. Run the full direct mock runner. +2. Confirm `sweep-015-scene` becomes `direct-mock-pass`. +3. Confirm total output record count is `102`. + +### Acceptance Criteria + +1. `direct-mock-pass = 102`. +2. `direct-mock-partial = 0`. +3. `direct-mock-fail = 0`. + +## Phase 3: Publish Closure + +### Tasks + +1. Refresh the full direct mock report. +2. Publish a sweep-015 closure report. + +### Acceptance Criteria + +1. Closure report states this is mock-only. +2. Closure report does not claim pseudo-production or production pass. + +## Completion Criteria + +This plan is complete when the full direct mock result has no partial/fail records and the closure report is published. + +## Stop Statement + +Stop after publishing the closure report. + +Do not start pseudo-production batch selection under this plan. + diff --git a/docs/superpowers/plans/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-plan.md b/docs/superpowers/plans/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-plan.md new file mode 100644 index 0000000..d35e59d --- /dev/null +++ b/docs/superpowers/plans/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-plan.md @@ -0,0 +1,93 @@ +# Sweep-030 Deterministic Keyword / Alias Normalization Plan + +## Parent + +- Framework phase: post-materialization deterministic invocation hardening +- Parent route: line-loss service-console readiness correction +- Parent design: [2026-04-20-sweep-030-deterministic-keyword-alias-normalization-design.md](/D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-design.md) + +## Objective + +Make `sweep-030-scene` selectable from the natural service-console phrasing used by operators, without touching sgClaw runtime. + +## Fixed Input + +- `sweep-030-scene` +- Current failing operator phrasing: + - `兰州公司 台区线损大数据 月累计线损率统计分析。。。` +- Current parameter-complete phrasing: + - `兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。` + +## Phase 0: Freeze Boundary + +### Tasks + +1. Confirm the current `scene.toml` deterministic keyword set for `sweep-030-scene` +2. Confirm the natural operator phrasing to support +3. Freeze allowed / forbidden files + +### Deliverable + +- frozen route-local scope + +### Acceptance + +- scope stays limited to `sweep-030-scene` + +## Phase 1: Normalize Deterministic Aliases + +### Tasks + +1. Update `include_keywords` in `sweep-030-scene/scene.toml` +2. Preserve current suffix `。。。` +3. Keep aliases specific to line-loss monthly/weekly cumulative phrasing + +### Deliverable + +- normalized deterministic manifest + +### Acceptance + +- `台区线损大数据 月累计线损率统计分析` can satisfy at least one `include_keywords` match +- no runtime files are changed + +## Phase 2: Route-Local Verification Record + +### Tasks + +1. Record the normalized alias set +2. Record the operator-ready sample input +3. Record the next-layer note that helper/requesturl validation is still pending + +### Deliverables + +- `tests/fixtures/generated_scene/sweep_030_deterministic_keyword_alias_normalization_2026-04-20.json` +- `docs/superpowers/reports/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-report.md` + +### Acceptance + +- verification asset exists +- report states that this plan only fixes dispatch matchability + +## Allowed Files + +- `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-030-scene/scene.toml` +- `tests/fixtures/generated_scene/sweep_030_deterministic_keyword_alias_normalization_2026-04-20.json` +- `docs/superpowers/reports/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-report.md` + +## Forbidden Files + +- `src/compat/scene_platform/dispatch.rs` +- `src/browser/callback_host.rs` +- `src/service/server.rs` +- `src/generated_scene/*` +- `resources/rules.json` + +## Expected Delta + +- `sweep-030-scene` becomes dispatch-matchable from the natural operator wording used in the service console +- no claim about helper bootstrap success within this plan + +## Stop Statement + +Stop after manifest alias normalization and route-local report publication. Do not continue into callback-host or bootstrap-target fixes. diff --git a/docs/superpowers/plans/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-plan.md b/docs/superpowers/plans/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-plan.md new file mode 100644 index 0000000..13309cd --- /dev/null +++ b/docs/superpowers/plans/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-plan.md @@ -0,0 +1,76 @@ +# Generated Scene Local-Doc Pipeline Residual Closure Plan + +Date: 2026-04-21 + +Parent status source: + +- `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` + +Parent design: + +- `docs/superpowers/specs/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-design.md` + +## Intent + +Close the remaining bounded `local_doc_pipeline` residual bucket that still blocks full rematerialization completeness after runtime-semantics hardening. + +## Fixed Residual Bucket + +Only these six scenes are in scope: + +1. `sweep-025-scene` +2. `sweep-047-scene` +3. `sweep-050-scene` +4. `sweep-052-scene` +5. `sweep-062-scene` +6. `sweep-087-scene` + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_failures_2026-04-21.json` +2. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +3. `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` +4. corresponding original scene source directories under `D:/desk/智能体资料/全量业务场景/一平台场景` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. route-local generator tests under `tests/scene_generator*.rs` +5. route-local assets under `tests/fixtures/generated_scene/` +6. route-local report under `docs/superpowers/reports/` + +## Forbidden Files + +1. no rematerialization rerun inside this plan +2. no validation refresh rerun inside this plan +3. no edits to already materialized skill bundles under `examples/` +4. no runtime/service-console/browser changes +5. no official board updates +6. no pseudo-production execution + +## Required Work + +1. inspect the six source scenes and classify the exact missing workflow-evidence shapes +2. implement only reusable `local_doc_pipeline` evidence recovery needed for this bounded bucket +3. add route-local regression coverage for the recovered evidence shapes +4. publish route-local followup JSON +5. publish route-local report + +## Required Outputs + +1. `tests/fixtures/generated_scene/generated_scene_local_doc_pipeline_residual_closure_followup_2026-04-21.json` +2. `docs/superpowers/reports/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-report.md` + +## Validation + +1. prove the bounded six-scene evidence shapes are now recognized at analyzer/generator level +2. keep validation route-local to this plan +3. do not claim bundle-level closure until downstream rematerialization rerun + +## Stop Statement + +Stop after the bounded `local_doc_pipeline` residual closure slice and route-local assets are published. + +Do not rerun rematerialization or validation refresh inside this plan. diff --git a/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-plan.md b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-plan.md new file mode 100644 index 0000000..c26c4f1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-plan.md @@ -0,0 +1,130 @@ +# Generated Scene Runtime Semantics Offline Validation Bundle Refresh Plan + +Date: 2026-04-21 + +Design: + +- `docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-design.md` + +Upstream plans: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md` +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md` + +## Intent + +Build a refreshed offline validation bundle from the 2026-04-21 runtime-semantics rematerialized 102-skill set and refreshed validation handoff assets. + +This plan only prepares the portable bundle. It does not run pseudo-production validation. + +## Fixed Inputs + +1. `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` +2. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +3. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_failures_2026-04-21.json` +4. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_deterministic_invocation_readiness_2026-04-21.json` +5. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_natural_language_parameter_readiness_2026-04-21.json` +6. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_natural_language_invocation_samples_2026-04-21.json` +7. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_execution_handoff_2026-04-21.json` +8. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_evidence_checklist_2026-04-21.json` +9. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_execution_record_template_2026-04-21.json` +10. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_full_direct_mock_execution_2026-04-21.json` +11. `target/debug/sg_claw.exe` or an equivalent current local `sg_claw.exe` +12. Optional rule assets: + - `resources/rules-102-business-targets-candidate.json` + - `resources/rules-102-business-targets-merged.json` + - `resources/rules-102-business-targets.patch` + +## Output Directory + +Create: + +- `dist/sgclaw_102_runtime_semantics_validation_bundle_2026-04-21` + +Do not delete or mutate: + +- `dist/sgclaw_102_pseudoprod_validation_bundle_2026-04-20` + +## Phases + +### Phase 0: Input Verification + +1. Confirm the 2026-04-21 rematerialization directory exists. +2. Confirm it contains 102 skill directories. +3. Confirm the 2026-04-21 rematerialization failures asset reports zero failures. +4. Confirm the 2026-04-21 validation refresh assets exist. +5. Confirm the pseudo-production handoff contains the refreshed selected batch. + +### Phase 1: Bundle Directory Preparation + +1. Create the target `dist/` bundle directory. +2. Create subdirectories: + - `skills/` + - `docs/` + - `handoff/` + - `resources/` + - `results/` + - `evidence/` +3. Copy `sg_claw.exe` into the bundle root. +4. Copy the 2026-04-21 rematerialized `skills/` content into the bundle. + +### Phase 2: Documentation And Handoff Copy + +1. Copy `SCENE_INDEX.md` and `scene_skill_102_index.json` from the 2026-04-21 rematerialization output into `docs/`. +2. Copy refreshed validation/handoff assets into `handoff/`. +3. Copy rule candidate assets into `resources/` when present. +4. Generate `README.md` with: + - config sample using `skillsDir` + - browser WebSocket reminder + - service console execution notes + - credential safety warning + - page URL guidance +5. Generate `BATCH_001.md` from the refreshed pseudo-production handoff selected batch. +6. Generate `BUNDLE_MANIFEST.json`. + +### Phase 3: Bundle Validation + +1. Confirm `skills/` contains 102 scene directories. +2. Confirm every skill directory contains: + - `SKILL.toml` + - `SKILL.md` + - `scene.toml` + - `scripts/` +3. Confirm critical JSON files parse. +4. Confirm `BATCH_001.md` includes the same selected batch count as the handoff. +5. Confirm no credentials or tokens are introduced by the generated bundle docs. + +### Phase 4: Report + +Publish: + +- `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-report.md` + +The report must include: + +1. bundle path +2. copied skill count +3. selected batch count +4. copied handoff assets +5. validation result +6. explicit statement that no pseudo-production execution was run + +## Allowed Files + +1. `dist/sgclaw_102_runtime_semantics_validation_bundle_2026-04-21/**` +2. `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-report.md` + +## Forbidden Files + +1. No `src/` changes +2. No `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21/**` edits +3. No validation asset edits under `tests/fixtures/generated_scene/` +4. No official board updates +5. No old bundle deletion +6. No credentials or secret material + +## Stop Statement + +Stop after the refreshed offline validation bundle and report are generated and validated. + +Do not run browser, inner-network, or pseudo-production validation inside this plan. diff --git a/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-plan.md b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-plan.md new file mode 100644 index 0000000..1333d75 --- /dev/null +++ b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-plan.md @@ -0,0 +1,60 @@ +# Generated Scene Runtime Semantics Post-Refresh Residual Closure Plan + +Date: 2026-04-21 + +Parent execution: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md` + +## Intent + +Fix the bounded post-refresh residuals that currently block the 2026-04-21 hardened bundle from becoming the canonical validation-ready bundle. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +2. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_deterministic_invocation_readiness_2026-04-21.json` +3. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_static_validation_2026-04-21.json` +4. `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` + +## Allowed Files + +1. `src/generated_scene/` +2. `tests/scene_generator*.rs` +3. route-local fixture assets under `tests/fixtures/generated_scene/` +4. route-local report under `docs/superpowers/reports/` + +## Forbidden Files + +1. no rematerialization execution inside this plan +2. no validation refresh execution inside this plan +3. no official board updates +4. no runtime/service-console changes +5. no pseudo-production execution + +## Required Outputs + +1. route-local residual followup JSON +2. route-local residual report + +## Closure Targets + +### Residual 1: deterministic suffix regression + +Close the generator path that caused rematerialized `scene.toml` files to emit scene-name deterministic suffixes instead of `。。。`. + +### Residual 2: `sweep-078-scene` TOML corruption + +Close the serialization path that emitted malformed TOML in the rematerialized `scene.toml`. + +## Validation + +1. prove the generator now writes `suffix = "。。。"` for the bounded target coverage slice +2. prove the `sweep-078-scene` generated `scene.toml` is parseable after generation +3. keep all validation route-local to this residual plan + +## Stop Statement + +Stop after residual closure assets and report are published. + +Do not rerun rematerialization or validation refresh inside this plan. diff --git a/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md new file mode 100644 index 0000000..8355510 --- /dev/null +++ b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md @@ -0,0 +1,63 @@ +# Generated Scene Runtime Semantics Rematerialization Execution Plan + +Date: 2026-04-21 + +Parent dependency plan: + +- `docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-rematerialization-refresh-plan.md` + +## Intent + +Run the actual 102-scene rematerialization refresh required after runtime-semantics hardening routes. + +## Scope + +This plan may generate a new final materialization directory from source scenes using the current hardened generator. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_source_evidence_cross_scan_2026-04-20.json` +2. Current generated scene source roots under: + - `D:/desk/智能体资料/全量业务场景/一平台场景` +3. Current hardened generator code. +4. Current final human-readable index as a reference only: + - `examples/scene_skill_102_final_materialization_2026-04-19/SCENE_INDEX.md` + +## Allowed Files + +1. New output directory under `examples/`. +2. New fixture assets under `tests/fixtures/generated_scene/`. +3. New report under `docs/superpowers/reports/`. + +## Forbidden Files + +1. No `src/` code changes. +2. No manual edits to generated skills after generation. +3. No official board updates. +4. No service-console or runtime changes. +5. No validation refresh. + +## Required Outputs + +1. `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` +2. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +3. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_failures_2026-04-21.json` +4. `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-report.md` + +## Validation + +1. Confirm 102 scenes were attempted. +2. Confirm generated skill directory count. +3. Confirm failures are explicitly recorded. +4. Confirm key hardened facts are present for `sweep-030-scene`: + - source-derived org dictionary + - `default_strategy = "lineloss_page_semantics"` + - expanded invocation aliases + - runtime URL role metadata + - request mappings + +## Stop Statement + +Stop after rematerialization assets and execution report are published. + +Do not execute validation refresh inside this plan. diff --git a/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md new file mode 100644 index 0000000..4e9accf --- /dev/null +++ b/docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md @@ -0,0 +1,64 @@ +# Generated Scene Runtime Semantics Validation Refresh Execution Plan + +Date: 2026-04-21 + +Parent dependency plan: + +- `docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-validation-refresh-plan.md` + +Parent rematerialization execution: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md` + +## Intent + +Run the actual validation refresh required after the hardened 102-scene rematerialization. + +## Fixed Inputs + +1. `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` +2. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +3. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_failures_2026-04-21.json` + +## Required Layers + +1. deterministic invocation readiness refresh +2. natural-language parameter readiness refresh +3. static validation refresh +4. direct mock execution refresh +5. pseudo-production handoff refresh + +## Allowed Files + +1. New or refreshed validation assets under `tests/fixtures/generated_scene/` +2. New or refreshed validation reports under `docs/superpowers/reports/` +3. New or refreshed validation support outputs under `examples/` + +## Forbidden Files + +1. No `src/` changes +2. No skill package edits inside the refreshed bundle +3. No rerun of rematerialization inside this plan +4. No official board updates +5. No pseudo-production execution + +## Required Outputs + +1. refreshed deterministic invocation readiness asset +2. refreshed natural-language parameter readiness asset +3. refreshed static validation asset +4. refreshed direct mock execution asset +5. refreshed pseudo-production handoff asset set +6. `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` + +## Validation + +1. confirm every refreshed validation asset points to the 2026-04-21 bundle +2. confirm the residual 6 rematerialization failures are represented as residual validation blockers, not silently skipped +3. confirm no stale pre-hardening validation asset is reused as the new proof set + +## Stop Statement + +Stop after refreshed validation assets and execution report are published. + +Do not execute pseudo-production runs inside this plan. diff --git a/docs/superpowers/references/tq-lineloss-lessons-learned.md b/docs/superpowers/references/tq-lineloss-lessons-learned.md new file mode 100644 index 0000000..14d1cb9 --- /dev/null +++ b/docs/superpowers/references/tq-lineloss-lessons-learned.md @@ -0,0 +1,35 @@ +# tq-lineloss Report Lessons Learned + +These lessons capture the non-negotiable rules that the generated scene skill platform must preserve when turning the current `tq-lineloss-report` scenario into future staged browser-script skills. + +## Routing + +- Deterministic intranet routing must require the exact `。。。` suffix. +- Unsupported scenes must fail closed with a prompt instead of falling back to hidden page defaults or legacy one-off logic. +- Ambiguous scene matches must fail closed and ask the operator to clarify. + +## Canonical parameters + +- Organization resolution must come from an explicit dictionary entity resolver. +- Period resolution must require an explicit month or week expression from the operator instruction. +- Generated browser scripts must receive canonical arguments and must not re-parse raw natural language. +- Page defaults are not allowed to silently fill missing organization or period values. + +## Bootstrap + +- Every generated browser-script scene must declare `expected_domain` and `target_url` in `scene.toml`. +- The runtime owns target URL/domain validation and bootstrap; generated skills should not hard-code Rust-side bootstrap behavior. +- When page context is present, routing can use it for scoring, but the manifest remains the registration contract. + +## Artifact contract + +- Report collection skills must return a generic `report-artifact` payload. +- Exportable artifacts must include stable `columns`, human-readable `column_defs`, and row data. +- When `postprocess.exporter = "xlsx_report"`, Rust should perform the XLSX export generically from the artifact fields. + +## Validation + +- A generated scene is not complete until deterministic routing, registry loading, browser-script execution, and postprocess tests cover it. +- Pipe and browser-ws service paths must stay covered because both remain supported runtime entrypoints. +- Manual service-console smoke testing must verify the real callback-host bootstrap path. +- Timeout diagnostics must preserve enough callback-host stage information to identify whether failure happened before helper load, before helper ready, or during command execution. diff --git a/docs/superpowers/references/tq-lineloss-lessons-learned.toml b/docs/superpowers/references/tq-lineloss-lessons-learned.toml new file mode 100644 index 0000000..949f9e0 --- /dev/null +++ b/docs/superpowers/references/tq-lineloss-lessons-learned.toml @@ -0,0 +1,24 @@ +[routing] +require_exact_suffix = true +unsupported_scene_fail_closed = true +ambiguity_fail_closed = true + +[canonical_params] +require_dictionary_entity_for_org = true +require_explicit_period = true +forbid_hidden_page_defaults = true + +[bootstrap] +require_expected_domain = true +require_target_url = true +prefer_page_context_when_present = true + +[artifact] +require_report_artifact = true +require_column_defs_for_export = true +rust_side_xlsx_export_when_postprocess_xlsx = true + +[validation] +require_pipe_and_ws_checks = true +require_manual_service_console_smoke = true +require_callback_host_timeout_notes = true diff --git a/docs/superpowers/reports/2026-04-18-102-scene-ledger-alignment-report.md b/docs/superpowers/reports/2026-04-18-102-scene-ledger-alignment-report.md new file mode 100644 index 0000000..3a23889 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-102-scene-ledger-alignment-report.md @@ -0,0 +1,66 @@ +# 102 Scene Ledger Alignment Report + +## Scope + +This round does not attempt full `102`-scene migration. + +It freezes the workbook snapshot `docs/2026-04-18-102-scenes-validation-overview.xlsx` into repo-local assets so the current code-backed family baseline can be compared against a concrete ledger snapshot. + +## Delivered + +Added: + +1. `tests/fixtures/generated_scene/scene_ledger_snapshot_2026-04-18.json` +2. `tests/scene_ledger_snapshot_test.rs` + +## Snapshot Facts + +From the workbook snapshot generated at `2026-04-18 16:48:05`: + +1. total scenes: `102` +2. validated scenes: `9` +3. not yet validated scenes: `93` +4. passed real sample: `1` +5. failed real samples: `3` +6. fail-closed scenes: `2` +7. reassigned boundary scenes: `3` + +## Alignment With Current Family Assets + +The frozen ledger snapshot now explicitly aligns with the repo-local family baseline on these points: + +1. `G1-E` + - `高低压新增报装容量月度统计表` is the only passed real sample in the snapshot +2. `G2` + - three validated line-loss samples remain recorded as first-round failed runs +3. `G6` + - `电能表现场检验完成率指标报表` is recorded as a reassigned boundary scene +4. `G7` + - `计量资产库存统计` is recorded as a reassigned boundary scene +5. `G8` + - `95598供电服务月报` is recorded as a reassigned boundary scene + +## Interpretation + +The workbook remains a mid-run ledger snapshot, not the final latest state of same-day markdown execution. + +That distinction is now preserved in repo assets: + +1. ledger snapshot = frozen workbook state +2. family assets = current code-backed baseline +3. markdown reports = later execution progress on the same day + +## Validation + +Passed: + +- `cargo test --test scene_ledger_snapshot_test -- --nocapture` + +## Outcome + +Track D / Track E now have a repo-local bridge between: + +1. family-level code-backed baselines +2. the original `102`-scene workbook snapshot + +This gives the next expansion round a stable ledger anchor without forcing full Excel-driven workflow into the compiler path. diff --git a/docs/superpowers/reports/2026-04-18-first-real-scene-migration-execution-sheet.md b/docs/superpowers/reports/2026-04-18-first-real-scene-migration-execution-sheet.md new file mode 100644 index 0000000..bcbf1d6 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-first-real-scene-migration-execution-sheet.md @@ -0,0 +1,120 @@ +# 第一轮真实样本迁移与验证执行表 + +日期:2026-04-18 + +上游设计:`docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md` + +上游计划:`docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md` + +## 1. 文档目的 + +本表用于承接 `60 -> 90` 路线图进入真实样本阶段后的首轮落地动作。 + +这一轮不直接要求业务人员先去内网验证,而是先完成以下顺序: + +1. 从真实场景中收敛首轮样本 +2. 按当前 scene -> skill 转化链生成首轮 skill +3. 将生成结果与 `tq` 级基线或当前 canonical/family 基线对比 +4. 只把通过结构、语义、readiness 初筛的候选物,送入后续内网验证 + +## 2. 选样原则 + +首轮样本选择遵循以下边界: + +1. 严格服从 plan 的主线范围,只覆盖 `G1 + G2 + G3` +2. 优先选择业务结构清晰、家族代表性强、后续容易形成复用模板的场景 +3. `G2` 优先级最高,因为它直接决定是否能逼近 `tq-lineloss-report` 级别的业务语义恢复 +4. `G1` 首轮优先选择通用报表类,不优先选择强上传型、强附件型、强人工补录型 +5. `G3` 首轮优先选择分页明细补数链清晰的工单类 +6. 对发现“标题与真实脚本结构不一致”或“页面壳与业务链条错位”的场景,先降级为观察样本,不进入首轮主样本 + +## 3. 当前收敛结果 + +### 3.1 主样本家族 + +- `G2` 多模式报表家族:先打透 +- `G1` 通用单页报表家族:先选干净样本验证可复制性 +- `G3` 分页明细补数家族:验证复杂 workflow 拆解与 fail-closed + +### 3.2 暂不纳入首轮主样本的发现 + +- `高低压新增报装容量月度统计表` + 当前页面标题、工作信息描述、脚本主体出现明显不一致,疑似存在页面壳复用或内容错配,不适合作为首轮基准样本。 + +## 4. 第一轮执行样本表 + +| 序号 | 场景名称 | 家族 | 首轮角色 | 初判 archetype | 选入原因 | 主要风险 | 对标基线 | 当前状态 | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| 1 | 台区线损大数据-月_周累计线损率统计分析 | G2 | P0 主锚点 | `multi_mode_request.month_week_table` | 已知原始场景,可直接对标 `tq-lineloss-report`,也是当前 60->90 提升的核心样板 | mode matrix、动态日期字段、localhost 与宿主桥接区分 | `tq-lineloss-report` + P0 canonical | 待生成 | +| 2 | 白银线损周报 | G2 | 家族扩展样本 | `multi_mode_request` 或同家族线损周报变体 | 与线损家族强相关,能验证 `tq` 样板是否可迁移到同家族其他周报 | 双系统登录态、周报聚合逻辑、内网 token 依赖 | `tq` 家族语义基线 + P1 family | 待生成 | +| 3 | 线损同期差异报表 | G2 | 复杂扩展样本 | `multi_mode_request` / `secondary_request` 复合链 | 能暴露“多系统对比、主请求 + 二次补链”的真实缺口,是 G2 向 90 分跨越的关键压力样本 | secondary request、跨系统上下文、workflow 拆解 | `tq` 家族语义基线 + P1 family | 待生成 | +| 4 | 售电收入日统计 | G1 | 主样本 | `single_request_table` 倾向,但可能带明细遍历 | 属于典型报表生成场景,页面参数较集中,适合作为 `G1` 首轮切入 | 可能存在按用户逐条查询与汇总,不一定是纯单请求 | P0/P1 canonical/family | 待生成 | +| 5 | 供电所线路电量统计 | G1 | 补充样本 | `single_request_table` 到轻量 workflow 之间 | 具备日期范围、关键词、结果表格,能验证单页查询统计类的可抽象程度 | 可能依赖跨页执行与页面内表格汇总,而非纯 API 报表导出 | P0/P1 canonical/family | 待生成 | +| 6 | 用户日电量监测 | G1 | 观察补样 | `single_request_table` 外围变体 | 可覆盖上传/处理/导出型报表边界,用于验证 G1 主模板的外延边界 | 上传驱动、处理链较重,不适合作为 G1 唯一主样本 | P0/P1 canonical/family | 待生成 | +| 7 | 95598工单明细表 | G3 | 主样本 | `paginated_enrichment` | 典型分页工单明细场景,存在主查询、分页、补数、导出链路,适合作为 G3 首个标准样本 | 分页补数、日区间滚动、SQL/去重/导出链 | P0-3 canonical + family 基线 | 待生成 | +| 8 | 95598、12398、流程超期风险工单明细 | G3 | 扩展样本 | `paginated_enrichment` 复合变体 | 同时覆盖多来源工单与超期风险链,能验证 G3 是否只会处理单一分页表 | 多入口 workflow、复合条件、主链与补链拆分 | P0-3 canonical + family 基线 | 待生成 | + +## 5. 首轮执行顺序 + +首轮执行顺序固定如下,不跨序扩散: + +1. 先生成 `台区线损大数据-月_周累计线损率统计分析` +2. 再生成 `白银线损周报` +3. 再生成 `线损同期差异报表` +4. 再进入 `G1` 的 `售电收入日统计`、`供电所线路电量统计` +5. `用户日电量监测` 仅作为 `G1` 边界补样,不先于前两个 `G1` 样本 +6. 最后进入 `G3` 的两个工单类样本 + +这样安排的原因是: + +1. `G2` 是否打透,决定当前链路是否真正具备 `tq` 级语义恢复能力 +2. `G1` 更适合在 `G2` 形成稳定抽取口径后做规模化复制验证 +3. `G3` workflow 更复杂,应该建立在前两组的证据层和契约层已经稳定之后 + +## 6. 每个样本生成后的必做检查 + +每个样本生成完后,必须按同一口径检查,不允许直接跳内网: + +1. `sceneId / skillId` 是否稳定,不出现退化命名 +2. `bootstrap domain` 是否落到真实业务系统,而不是误抽到 `localhost` +3. `workflow archetype` 是否正确,不把复杂链条误压成简单单请求 +4. `request contract` 是否闭合,关键字段是否齐全 +5. `response path / column defs / normalize rules` 是否闭合 +6. `localhost:*` 是否被识别为宿主桥接依赖,而不是业务主链 +7. readiness 是否可解释,失败时是否明确 fail-closed + +## 7. 每类样本的通过标准 + +### 7.1 G2 通过标准 + +- 至少能恢复正确的 mode matrix +- 能区分主业务接口与宿主桥接依赖 +- 生成结果在关键语义上达到 `tq-lineloss-report` 同等级参考水平 + +### 7.2 G1 通过标准 + +- 能稳定抽出查询参数、目标接口、响应路径、列定义、导出产物 +- 同家族两个以上样本可以共用同一 archetype 与 gate 口径 + +### 7.3 G3 通过标准 + +- 能识别分页主链、补数链、导出链 +- 证据不足时稳定阻断,不伪装为可运行 skill + +## 8. 本轮产出物要求 + +本轮真实样本阶段至少产出以下文档或结果: + +1. 本执行表 +2. 首轮样本生成结果 +3. 样本对标比对记录 +4. 候选内网验证名单 +5. 不通过样本的 blocker 归因 + +## 9. 下一步 + +按本表顺序进入下一步: + +1. 使用当前转化链生成上述首轮样本 skill +2. 将生成结果与 `tq/canonical/family` 基线逐项对比 +3. 输出首轮迁移与候选验证报告 diff --git a/docs/superpowers/reports/2026-04-18-first-round-migration-and-candidate-validation-report.md b/docs/superpowers/reports/2026-04-18-first-round-migration-and-candidate-validation-report.md new file mode 100644 index 0000000..7c6aeb8 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-first-round-migration-and-candidate-validation-report.md @@ -0,0 +1,163 @@ +# 第一轮迁移与候选验证报告 + +日期:2026-04-18 + +上游执行表:[2026-04-18-first-real-scene-migration-execution-sheet.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-18-first-real-scene-migration-execution-sheet.md) + +相关分析: + +1. [2026-04-18-r1-real-tq-lineloss-analysis.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-18-r1-real-tq-lineloss-analysis.md) +2. [2026-04-18-g2-first-round-blocker-summary.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-18-g2-first-round-blocker-summary.md) + +## 1. 报告目的 + +本报告只回答四个问题: + +1. 第一轮真实样本实际迁移了哪些场景 +2. 这些场景生成结果是否达到候选验证门槛 +3. 哪些样本可以进入后续内网验证 +4. 哪些 blocker 已经稳定,可以作为后续整改输入 + +## 2. 第一轮实际执行范围 + +按执行表顺序,本轮实际完成了 `G2` 家族首轮三样本: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +对应生成产物位于: + +1. `examples/real_scene_batch_round1/skills/real-tq-lineloss-report-r1` +2. `examples/real_scene_batch_round1/skills/real-baiyin-lineloss-weekly-r1` +3. `examples/real_scene_batch_round1/skills/real-lineloss-period-diff-r1` + +本轮尚未进入: + +1. `G1` 单页报表样本 +2. `G3` 分页明细补数样本 + +## 3. 候选验证判定口径 + +样本只有同时满足以下条件,才可以进入后续内网候选验证: + +1. archetype 正确 +2. bootstrap 落到真实主业务承载面 +3. modes 或 workflow 主链恢复正确 +4. request contract 基本闭合 +5. response path / column defs / normalize rules 基本闭合 +6. readiness 与真实可运行性不明显冲突 + +任一核心项失败,当前轮次即判定为: + +- `不进入候选验证` + +## 4. 第一轮样本结论表 + +| 场景 | 家族 | 生成是否成功 | 语义是否通过 | 是否进入候选验证 | 结论 | +| --- | --- | --- | --- | --- | --- | +| 台区线损大数据-月_周累计线损率统计分析 | G2 | 是 | 否 | 否 | 生成成功,但业务语义恢复失败 | +| 白银线损周报 | G2 | 是 | 否 | 否 | 生成成功,但沿同类错误路径坍缩 | +| 线损同期差异报表 | G2 | 是 | 否 | 否 | 生成成功,但复杂链路没有被正确表达 | + +## 5. 候选验证名单 + +本轮候选验证名单: + +- `空` + +原因不是“生成器完全跑不起来”,而是“生成器已经能产出包,但这些包还不具备进入内网业务验证的价值”。 + +## 6. 本轮稳定确认的 blocker + +### 6.1 archetype 坍缩 + +三份样本全部被判成: + +- `paginated_enrichment` + +而不是线损家族应接近的: + +- `multi_mode_request` + +### 6.2 bootstrap 稳定误选 + +三份样本全部落到: + +- `20.77.115.36:31051` + +而没有锚到线损主业务页面承载面。 + +### 6.3 modes 未恢复 + +三份样本都出现: + +1. `modes = []` +2. `defaultMode = "month"` +3. `modeSwitchField = "period_mode"` + +说明系统只保留了 mode 词面痕迹,没有恢复真正的 mode 结构。 + +### 6.4 request contract 缺失 + +三份样本都出现: + +1. `params = []` +2. `requestTemplate = null` +3. 脚本用泛化 `args` 直接拼请求 + +说明当前还没有恢复出线损家族真正需要的请求合同。 + +### 6.5 column defs 缺失 + +三份样本都出现: + +1. `columnDefs = []` +2. `requiredFields = []` +3. 用返回首行 key 动态拼列 + +这与 `tq` 级稳定报表结构还有明显距离。 + +### 6.6 endpoint 污染 + +三份样本都混入了大量无关 endpoint: + +1. 其他业务系统接口 +2. 文档和第三方库 URL +3. 静态依赖中的外链 + +说明当前 endpoint 提取还没有有效隔离业务主链与资源噪声。 + +### 6.7 readiness 过度乐观 + +三份样本都给了较高 readiness,但核心合同并未闭合。 + +这说明当前 readiness 还不能作为“可进内网验证”的判断依据。 + +## 7. 本轮确认的正向信号 + +虽然没有候选样本,但本轮也确认了三类有效正向信号: + +1. 系统已经能从真实样本中抓到线损核心 endpoint 名称 +2. 系统已经能抓到 `month/week/tjzq/mode` 等模式词面信号 +3. 系统已经能把 `localhost:*` 识别为宿主依赖证据,而不是直接当成业务 bootstrap + +这说明当前链路不是“完全失明”,而是“证据抽到了,但主业务语义编译失败”。 + +## 8. 本轮阶段结论 + +第一轮迁移阶段的正式结论是: + +1. 当前生成链已经具备真实场景证据抽取能力 +2. 当前生成链还不具备 `G2/tq` 级业务主链重建能力 +3. 因此第一轮没有任何样本达到候选验证门槛 +4. 本轮最有价值的产出不是候选 skill,而是稳定复现出来的 `G2` 家族 blocker 画像 + +## 9. 后续输入 + +本报告形成后的后续输入只有两类: + +1. 作为后续整改阶段的 blocker 基线 +2. 作为进入 `G1/G3` 首轮样本前的参考口径 + +本报告本身不包含整改方案,也不展开后续开发任务。 diff --git a/docs/superpowers/reports/2026-04-18-g1-boundary-reassignment-report.md b/docs/superpowers/reports/2026-04-18-g1-boundary-reassignment-report.md new file mode 100644 index 0000000..eca3591 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g1-boundary-reassignment-report.md @@ -0,0 +1,127 @@ +# G1 Boundary Reassignment Report + +> Date: 2026-04-18 +> Scope: `G1` 边界收紧与 4 个边界样本正式重排 +> Source: +> - `examples/g1_batch_round1/skills/` +> - `docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md` +> - `docs/superpowers/plans/2026-04-18-g1-boundary-reassignment-plan.md` + +## 1. Conclusion + +本轮分析结论已经冻结: + +1. `G1` 需要整改,不能继续维持原有宽边界。 +2. 4 个边界样本中,只有 `高低压新增报装容量月度统计表` 保留在 `G1`,但应提升为 `G1-E 轻量补查汇总型`。 +3. `电能表现场检验完成率指标报表`、`计量资产库存统计`、`95598供电服务月报` 均应从 `G1` 中拆出,分别并入 `G6`、`G7`、`G8`。 + +## 2. Background + +在第一轮 `G1` 批量迁移中,共选取 10 个“相对常用报表样本”进行实测生成。结果显示: + +1. 虽然大部分样本可以完成生成流程,但其中一批场景并没有恢复出可靠的 `single_request_table` 语义。 +2. 多个样本被错误吸附到无关外域或被降级为 `page_state_eval`,说明当前 `G1` 边界过宽。 +3. 对 4 个“相对有希望”的边界样本继续深挖后,确认它们并不属于同一个单页单请求家族。 + +## 3. Sample Findings + +### 3.1 高低压新增报装容量月度统计表 + +- 生成结果路径: + `examples/g1_batch_round1/skills/g1-capacity-monthly/references/generation-report.json` +- 当前落点: + `page_state_eval` +- 关键信号: + - `bootstrap domain = yxgateway.gs.sgcc.com.cn` + - `params=[]` + - `modes=[]` + - `requestEntries=[]` + - `columnDefs=[]` +- 源场景可见业务结构: + - 主查询:`getWkorderAll` + - 补查:`queryElectCustInfo` + - 补查:`queryBusAcpt` + - 补查:`getBatchPerCust97` +- 结论: + 该场景仍保有主查询骨架,但不是严格单请求平面场景,适合作为 `G1-E` 上边界样本。 + +### 3.2 电能表现场检验完成率指标报表 + +- 生成结果路径: + `examples/g1_batch_round1/skills/g1-meter-inspection-rate/references/generation-report.json` +- 当前落点: + `page_state_eval` +- 关键信号: + - `bootstrap domain = yxgateway.gs.sgcc.com.cn` + - `params=[]` + - `modes=[]` + - `requestEntries=[]` + - `columnDefs=[]` +- 源场景可见业务结构: + - `sgBrowserExcuteJsCode` + - `BrowserAction` + - `getWorkOrderToDoList` + - `queryMeterPlanFormulateApp` + - `queryMeterPlanDtlForAddMeter` +- 结论: + 该场景本质上依赖宿主桥接与多步 callback workflow,不应再放入 `G1`,应重排到 `G6 宿主桥接多步查询型`。 + +### 3.3 计量资产库存统计 + +- 生成结果路径: + `examples/g1_batch_round1/skills/g1-meter-asset-stock/references/generation-report.json` +- 当前落点: + `page_state_eval` +- 关键信号: + - `bootstrap domain = yxgateway.gs.sgcc.com.cn` + - `params=[]` + - `modes=[]` + - `requestEntries=[]` + - `columnDefs=[]` +- 源场景可见业务结构: + - `assetStatsQueryMeter` + - `assetStatsQueryIt` + - `assetStatsQueryAcqTrml` + - `assetStatsQueryMeterCommonModule` + - `assetStatsQueryJlGnModule` +- 结论: + 该场景属于典型的多接口盘点汇总,不符合 `single_request_table` 假设,应重排到 `G7 多接口盘点汇总型`。 + +### 3.4 95598供电服务月报 + +- 生成结果路径: + `examples/g1_batch_round1/skills/g1-95598-service-monthly/references/generation-report.json` +- 当前落点: + `page_state_eval` +- 关键信号: + - `bootstrap domain = south.95598.sgcc.com.cn` + - `params=[]` + - `modes=[]` + - `requestEntries=[]` + - `columnDefs=[]` +- 源场景可见业务结构: + - `BrowserAction('sgBrowerserJsAjax2', ...)` + - `localhost:13313/configServices/selectData` + - `definedSqlQuery` + - `docExport(...)` +- 结论: + 该场景不是直接报表导出,而是“抓取 -> 落库 -> SQL 分析 -> 文档生成”的复合链路,应重排到 `G8 抓取落库分析出文档型`。 + +## 4. Final Reassignment Table + +| 样本场景 | 正式归属 | 是否继续作为普通 G1 样本 | +| --- | --- | --- | +| 高低压新增报装容量月度统计表 | G1-E 轻量补查汇总型 | 否 | +| 电能表现场检验完成率指标报表 | G6 宿主桥接多步查询型 | 否 | +| 计量资产库存统计 | G7 多接口盘点汇总型 | 否 | +| 95598供电服务月报 | G8 抓取落库分析出文档型 | 否 | + +说明:`高低压新增报装容量月度统计表` 虽然仍归入 `G1` 体系,但已作为 `G1-E` 边界子型使用,不再作为普通 `single_request_table` 样本。 + +## 5. Impact on Next Steps + +本报告落地后,后续执行口径同步固定为: + +1. 先按收紧后的 `G1` 规则继续推进 `G1-E`。 +2. 不再把 `G6/G7/G8` 样本塞回 `G1` 做“通用报表生成”尝试。 +3. 新家族能力落地前,对应样本应输出明确阻断原因与家族归属,而不是继续产出低质量伪可运行 skill。 diff --git a/docs/superpowers/reports/2026-04-18-g1-e-p0-validation-report.md b/docs/superpowers/reports/2026-04-18-g1-e-p0-validation-report.md new file mode 100644 index 0000000..03b2c12 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g1-e-p0-validation-report.md @@ -0,0 +1,190 @@ +# G1-E P0 Validation Report + +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-g1-e-light-enrichment-report-plan.md` Phase 1 -> Phase 3 +> Fixed Sample: +> - `D:\desk\智能体资料\全量业务场景\一平台场景\高低压新增报装容量月度统计表` + +## 1. Conclusion + +本轮 `G1-E` P0 验证已通过。 + +最终结果已经从误判的 `page_state_eval` 收敛为正式的 `single_request_enrichment`,并且真实样本生成结果中已经同时恢复出: + +1. 主请求 +2. 补查请求集合 +3. 合并/汇总规则 + +这说明本轮 plan 的核心目标已经达成:系统不再只会“识别并拦截低质量生成物”,而是已经能对该 P0 样本恢复出可编译的 `G1-E` 三段式语义。 + +## 2. Validation Inputs + +真实样本生成命令: + +```powershell +cargo run --bin sg_scene_generate -- --source-dir "D:/desk/智能体资料/全量业务场景/一平台场景/高低压新增报装容量月度统计表" --scene-id g1e-capacity-monthly --scene-name "高低压新增报装容量月度统计表" --scene-kind report_collection --output-root "D:/data/ideaSpace/rust/sgClaw/claw-new/examples/g1e_p0_validation" +``` + +关键输出位置: + +1. `examples/g1e_p0_validation/skills/g1e-capacity-monthly/references/generation-report.json` +2. `examples/g1e_p0_validation/skills/g1e-capacity-monthly/references/generation-report.md` +3. `examples/g1e_p0_validation/skills/g1e-capacity-monthly/scripts/collect_g1e_capacity_monthly.js` + +## 3. Before vs After + +整改前: + +1. `workflowArchetype = page_state_eval` +2. `readiness.notes` 中记录的是 `workflow_archetype=page_state_eval` +3. 生成脚本走的是 `page_state_eval` / 简化脚本路径 +4. 虽然已经提取到 `mainRequest / enrichmentRequests / mergePlan`,但 compiler 没有真正走 `G1-E` 专用编译路径 + +整改后: + +1. `workflowArchetype = single_request_enrichment` +2. `generation-report.md` 明确显示 `Workflow archetype: single_request_enrichment` +3. 生成脚本已经包含: + - `const MAIN_REQUEST =` + - `const ENRICHMENT_REQUESTS =` + - `const MERGE_PLAN =` +4. `readiness.notes` 已更新为 `workflow_archetype=single_request_enrichment` + +## 4. Recovered Semantics + +### 4.1 Main Request + +已恢复主请求: + +1. Endpoint: `getWkorderAll` +2. URL: `http://yxgateway.gs.sgcc.com.cn/emss-cmnf-common-front/member/workOrderQuery/getWkorderAll` +3. Request template: + - `pageNo = 1` + - `pageSize = 1` +4. Response path: `data` + +### 4.2 Enrichment Requests + +已恢复补查请求: + +1. `queryElectCustInfo` +2. `queryBusAcpt` +3. `getBatchPerCust97` + +其中补查入参绑定已经恢复出: + +1. `appNo = ${row.wkOrderNo}` +2. `getBatchPerCust97` 额外固定参数: + - `baseNewFlag = "01"` + - `pageNo = 1` + - `pageSize = 20` + +### 4.3 Merge Plan + +已恢复汇总语义: + +1. Join key: `wkOrderNo` +2. Group rule: `group_by:countyCodeName` +3. 汇总附加规则: + - `append_total_row` + - `compute_share_columns` +4. 聚合规则: + - `com:hightVolTotalThisMonth,hightPressureTotalThisMonth` + - `com:lowVolTotalThisMonth,lowPressureTotalThisMonth` + - `batchCom:lowVolTotalThisMonth,lowPressureTotalThisMonth` + - `com:hightVolTotalOtherMonth,hightPressureTotalOtherMonth` + - `com:lowVolTotalOtherMonth,lowPressureTotalOtherMonth` + - `batchCom:lowVolTotalOtherMonth,lowPressureTotalOtherMonth` + +### 4.4 Output Columns + +最终输出列已非空,且恢复出完整统计口径,包含: + +1. `index` +2. `countyCodeName` +3. `hightPressureTotalOtherMonth` +4. `hightVolTotalOtherMonth` +5. `lowPressureTotalOtherMonth` +6. `lowVolTotalOtherMonth` +7. `otherMonthShare` +8. `hightPressureTotalThisMonth` +9. `hightVolTotalThisMonth` +10. `lowPressureTotalThisMonth` +11. `lowVolTotalThisMonth` +12. `thisMonthShare` +13. `yearHightPressureTotal` +14. `yearHightVolTotal` +15. `yearLowPressureTotal` +16. `yearLowVolTotal` +17. `yearShare` + +## 5. Root Cause And Fix + +本次误判的直接原因不是证据缺失,而是 archetype 分类优先级错误。 + +真实样本同时存在以下噪音信号: + +1. `branch_fields=reportType, mode, month` +2. `filters=res.status == 200` +3. 常见分页字段 + +旧逻辑要求: + +1. 只有在 `!has_branch_fields` 时,才允许判为 `SingleRequestEnrichment` + +这导致真实样本即使已经同时满足: + +1. `has_g1e_main_endpoint` +2. `has_g1e_enrichment` +3. `has_g1e_merge_signal` + +仍然会因为存在 `mode/reportType/month` 噪音而失去 `G1-E` 判定资格,随后又被 `status` 噪音吸入 `page_state_eval`。 + +本次修正后的口径是: + +1. 只要 `G1-E` 三段式核心证据齐备,就优先判为 `single_request_enrichment` +2. 不再因为一般性的 `branch field` 噪音退化为 `page_state_eval` + +## 6. Test And Verification Record + +本轮执行的定向验证: + +1. `cargo test --test scene_generator_test` +2. `cargo test --test scene_generator_modes_test` +3. `cargo test --test scene_generator_html_test` +4. `node --test tests/scene_generator_llm_test.js` + +结果: + +1. 以上 `scene generator` 定向测试全部通过 +2. 真实 P0 样本复跑成功 + +补充说明: + +1. `cargo test --workspace` 中存在两条与本次 `G1-E` plan 无关的既有失败: + - `service::server::tests::deterministic_bootstrap_target_uses_plan_target_url` + - `service::server::tests::bootstrap_target_precedence_matrix_covers_page_context_deterministic_skill_and_fallback` +2. 这两条失败不属于本次 plan 改动引入,也不阻塞本次 `G1-E` P0 验收 + +## 7. Acceptance Check + +对照 plan 的 Phase 3 退出标准,当前状态如下: + +1. 主请求、补查请求、合并规则均已稳定恢复 +2. 结果不再是 `params=[] / requestEntries=[] / columnDefs=[]` 的空壳生成物 +3. archetype 已不再退化为 `page_state_eval` +4. compiler 已实际走入 `G1-E` 专用路径 + +结论: + +1. `docs/superpowers/plans/2026-04-18-g1-e-light-enrichment-report-plan.md` 的 Phase 1 -> Phase 3 已完成 + +## 8. Remaining Gap + +本轮完成的是 `G1-E` P0 最小闭环,不代表已经达到“业务结果 100% 等价于人工高质量 skill”。 + +当前仍然保留的后续空间是: + +1. 进一步提升字段中文标题编码质量 +2. 继续验证 `G1-E` 第二个真实样本是否能复用同一语义恢复路径 +3. 继续把 `G1-E` 与 `G6/G7/G8` 的越界判定做得更稳 diff --git a/docs/superpowers/reports/2026-04-18-g1-e-second-sample-reuse-report.md b/docs/superpowers/reports/2026-04-18-g1-e-second-sample-reuse-report.md new file mode 100644 index 0000000..5105a29 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g1-e-second-sample-reuse-report.md @@ -0,0 +1,71 @@ +# G1-E Second Sample Reuse Report + +> Date: 2026-04-18 +> Scope: `G1-E` second-sample reuse validation + +## Summary + +`G1-E` has moved from a single P0 anchor to a reusable family baseline. + +This round added a second repo-local `G1-E` expansion fixture and promoted the result into the P1 family assets. The expansion fixture keeps the same family contract as the P0 sample: + +1. one resolved main request +2. lightweight enrichment requests +3. a merge plan with join keys, aggregate rules, and output columns + +The result validates that `G1-E` should remain distinct from plain `single_request_table`, while still staying below the `G6/G7/G8` boundary. + +## Assets Updated + +1. `tests/fixtures/generated_scene/g1e_light_enrichment_expansion/index.html` +2. `tests/fixtures/generated_scene/p1_family_manifest.json` +3. `tests/fixtures/generated_scene/p1_family_results.json` +4. `tests/scene_generator_test.rs` +5. `tests/scene_generator_p1_family_test.rs` +6. `tests/scene_generator_family_policy_test.rs` + +## Representative And Expansion Baseline + +| Role | Fixture | Expected Archetype | +| --- | --- | --- | +| Representative | `tests/fixtures/generated_scene/g1e_light_enrichment` | `single_request_enrichment` | +| Expansion | `tests/fixtures/generated_scene/g1e_light_enrichment_expansion` | `single_request_enrichment` | + +The expansion baseline asserts: + +1. main request contains `getWkorderAll` +2. enrichment request contains `queryMeterInfo` +3. merge join key contains `wkOrderNo` +4. merge aggregate rules contain `group_by:countyCodeName` +5. output columns contain `meterCapacityThisMonth` + +## Validation + +Executed: + +```powershell +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_p1_family_test -- --nocapture +cargo test --test scene_generator_family_policy_test -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +All four regression suites passed. + +## Result + +`G1-E` reuse is now validated at family level: + +1. `representativeRuns = 1` +2. `expansionRuns = 1` +3. `passedRuns = 2` +4. `failedRuns = 0` +5. `successRate = 1.0` + +## Next Step + +Per the frozen roadmap order, the next implementation line is `G6`: + +1. start independent `G6` spec / plan +2. use `电能表现场检验完成率指标报表` as the first boundary sample +3. build fail-closed classification for host-bridge multi-step workflow before attempting runnable generation diff --git a/docs/superpowers/reports/2026-04-18-g1e-additional-expansion-report.md b/docs/superpowers/reports/2026-04-18-g1e-additional-expansion-report.md new file mode 100644 index 0000000..801bde5 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g1e-additional-expansion-report.md @@ -0,0 +1,28 @@ +# G1-E Additional Expansion Report + +> Status: Completed +> Date: 2026-04-18 +> Plan: [2026-04-17-scene-skill-60-to-90-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md) + +## Intent + +Consume the remaining queued `G1-E` downstream candidate within the existing `single_request_enrichment` family contract. + +## Delivered + +1. Added `tests/fixtures/generated_scene/g1e_light_enrichment_additional` +2. Added deterministic and generation regressions for the additional sample +3. Promoted the sample into the formal `G1-E` batch expansion baseline +4. Synchronized family manifest, family results, ledger overlay, and roadmap execution status + +## Acceptance + +1. archetype remains `single_request_enrichment` +2. main request remains `getWkorderAll` +3. enrichment request is recovered as `queryBusAcpt` +4. merge contract still reuses `wkOrderNo` plus `group_by:countyCodeName` +5. `Track B` no longer keeps a queued downstream candidate + +## Notes + +This promotion stays inside the roadmap boundary because it only consumes the existing queued `G1-E` item and does not create a new family or archetype. diff --git a/docs/superpowers/reports/2026-04-18-g1e-candidate-batch-alignment-report.md b/docs/superpowers/reports/2026-04-18-g1e-candidate-batch-alignment-report.md new file mode 100644 index 0000000..cb9be7b --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g1e-candidate-batch-alignment-report.md @@ -0,0 +1,52 @@ +# G1-E Candidate Batch Alignment Report + +## Scope + +This round keeps the roadmap inside the `G1-E` Track B and Track D boundary. + +It does not add a third `G1-E` implementation sample. It converts the already validated representative plus first expansion state into a formal candidate-batch asset. + +## Delivered + +Added or updated: + +1. `tests/fixtures/generated_scene/g1e_candidate_batch_2026-04-18.json` +2. `tests/g1e_candidate_batch_test.rs` +3. `tests/fixtures/generated_scene/p1_family_manifest.json` +4. `tests/fixtures/generated_scene/p1_family_results.json` +5. `tests/fixtures/generated_scene/scene_ledger_status_2026-04-18.json` +6. `tests/scene_ledger_status_test.rs` + +## Batch Meaning + +The batch asset now freezes the current `G1-E` status as: + +1. one promoted baseline +2. one promoted expansion +3. one queued downstream candidate + +This keeps `G1-E` aligned with the same family-asset shape already used by `G2` and `G3`, without forcing unsupported expansion beyond the current evidence. + +## Validation + +Passed: + +1. `cargo test --test g1e_candidate_batch_test -- --nocapture` +2. `cargo test --test scene_generator_p1_family_test -- --nocapture` +3. `cargo test --test scene_ledger_status_test -- --nocapture` +4. `cargo test --test scene_generator_family_policy_test -- --nocapture` + +## Outcome + +`G1-E` now has: + +1. representative baseline +2. promoted expansion baseline +3. formal candidate batch +4. synchronized Track E ledger overlay status + +That completes the same core asset shape for the three roadmap mainline families: + +1. `G1-E` +2. `G2` +3. `G3` diff --git a/docs/superpowers/reports/2026-04-18-g2-candidate-batch-alignment-report.md b/docs/superpowers/reports/2026-04-18-g2-candidate-batch-alignment-report.md new file mode 100644 index 0000000..c0a75c2 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g2-candidate-batch-alignment-report.md @@ -0,0 +1,60 @@ +# G2 Candidate Batch Alignment Report + +## Scope + +This round continues the roadmap Track D and Track E mainline work for `G2`. + +It converts the current line-loss family progress into a formal repo-local batch asset instead of leaving the state split across separate markdown reports only. + +## Delivered + +Added or updated: + +1. `tests/fixtures/generated_scene/g2_candidate_batch_2026-04-18.json` +2. `tests/g2_candidate_batch_test.rs` +3. `tests/fixtures/generated_scene/p1_family_manifest.json` +4. `tests/fixtures/generated_scene/p1_family_results.json` +5. `tests/fixtures/generated_scene/scene_ledger_status_2026-04-18.json` +6. `tests/scene_generator_p1_family_test.rs` +7. `tests/scene_ledger_status_test.rs` + +## Batch Meaning + +The batch asset now freezes the current `G2` line-loss family status as: + +1. one `P0` anchor baseline +2. four promoted expansion baselines +3. one deferred queue item + +The promoted expansion baselines are: + +1. week-only mode variant +2. mixed linked workflow variant +3. comparison crosscheck variant +4. diagnosis drilldown variant + +## Deferred Scope + +`G2-D` remains deferred in this round because the current representative sample still does not expose a stable business endpoint contract. + +That stays within the roadmap plan boundary and does not force non-contract-first implementation work into the current mainline. + +## Validation + +Passed: + +1. `cargo test --test g2_candidate_batch_test -- --nocapture` +2. `cargo test --test scene_generator_p1_family_test -- --nocapture` +3. `cargo test --test scene_ledger_status_test -- --nocapture` +4. `cargo test --test scene_generator_family_policy_test -- --nocapture` + +## Outcome + +`G2` now matches the same Track D and Track E asset shape already established for `G3`: + +1. representative baseline +2. promoted expansion baselines +3. formal batch asset +4. current-status ledger overlay + +That means the remaining roadmap work can continue from a stable asset baseline instead of reconstructing `G2` progress from scattered markdown reports. diff --git a/docs/superpowers/reports/2026-04-18-g2-d-prediction-compute-report.md b/docs/superpowers/reports/2026-04-18-g2-d-prediction-compute-report.md new file mode 100644 index 0000000..5a9d70f --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g2-d-prediction-compute-report.md @@ -0,0 +1,45 @@ +# G2-D Prediction Compute Report + +> Status: Completed +> Date: 2026-04-18 +> Plan: [2026-04-17-scene-skill-60-to-90-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md) + +## Intent + +Within the existing `G2` line-loss family contract, promote the last deferred `G2-D` candidate into a repo-local reusable expansion baseline without crossing into new runtime families or login/transport scope. + +## Scope + +This slice only adds: + +1. a deterministic `G2-D` fixture +2. subtype classification for `g2_d_prediction_compute` +3. a minimal reusable `prediction` mode recovery path +4. regression coverage +5. Track D / Track E asset synchronization + +It does not introduce: + +1. file-input runtime support +2. local compute engine execution +3. new family boundaries outside `G2` + +## Delivered + +1. Added fixture: `tests/fixtures/generated_scene/g2_prediction_compute` +2. Added analyzer subtype: `G2FamilyVariant::G2D` +3. Added generator mode recovery: `prediction` +4. Added deterministic and generation regressions +5. Promoted `predicted_compute_variant` into the `G2` batch expansion baseline +6. Synchronized candidate batch, family manifest/results, ledger status overlay, and roadmap execution status + +## Acceptance + +1. `G2-D` now compiles through the existing `multi_mode_request` family path +2. generated default mode is `prediction` +3. generated report contains `lineId`, `lineLossRate`, and `powerLoss` +4. Track A no longer keeps `deferred-g2-d-candidate` as an outstanding item + +## Notes + +This promotion stays inside the roadmap boundary because it only consumes the remaining queued/deferred `G2` family item and does not open new archetype scope. diff --git a/docs/superpowers/reports/2026-04-18-g2-family-expansion-third-round-report.md b/docs/superpowers/reports/2026-04-18-g2-family-expansion-third-round-report.md new file mode 100644 index 0000000..29df765 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g2-family-expansion-third-round-report.md @@ -0,0 +1,142 @@ +# G2 家族扩展第三轮回归报告 + +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-g2-family-expansion-plan.md` +> Family: `G2` + +## 1. 本轮执行范围 + +本轮严格承接上一轮 `G2` 主样本整改,不回退 `tq` 主样本链路,只补以下两类剩余变体: + +1. `G2-B` + 周报单侧 mode 变体 + 代表样本:`白银线损周报` +2. `G2-C` + 线损主链路 + 外部系统联动的混合变体 + 代表样本:`线损同期差异报表` + +本轮实际落地内容: + +1. 在 analyzer 中引入 `G2FamilyVariant` +2. 建立 `G2-A/G2-B/G2-C` 三类子型边界 +3. 为 `G2-B` 补单周模式最小合同 +4. 为 `G2-C` 补主链路模式最小合同 +5. 保持 `G2-A` 主样本不回退 + +## 2. 回归测试结果 + +本地回归: + +- `cargo test --test scene_generator_test -- --nocapture` +- 结果:`19 passed` + +新增回归覆盖了: + +1. `G2-A` 主样本仍被识别为双模式主报表 +2. `G2-B` 被识别为周报单侧 mode 变体 +3. `G2-C` 被识别为混合联动变体 +4. `G2-B` 可以生成最小合同 +5. `G2-C` 可以生成最小合同 + +## 3. 真实样本第三轮结果 + +### 3.1 G2-A 基线 + +样本: + +- `台区线损大数据-月_周累计线损率统计分析` +- 产物:`examples/real_scene_batch_round1/skills/real-tq-lineloss-report-r1` + +结果: + +1. `workflowArchetype = multi_mode_request` +2. `bootstrap.expectedDomain = 20.76.57.61:18080` +3. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +4. `modes = [month, week]` +5. `g2_family_variant_candidate` 已写入 +6. `g2_request_contract_complete = true` +7. `g2_response_contract_complete = true` +8. `readiness.level = A` + +结论: + +- `G2-A` 基线稳定,没有回退 + +### 3.2 G2-B 周报单侧 mode 变体 + +样本: + +- `白银线损周报` +- 产物:`examples/real_scene_batch_round1/skills/real-baiyin-lineloss-weekly-r1` + +结果: + +1. 已成功生成,不再 fail-close +2. `workflowArchetype = multi_mode_request` +3. `bootstrap.expectedDomain = 20.76.57.61:18080` +4. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +5. `modes = [week]` +6. 产物中已恢复 `LINE_LOSS_RATE` +7. `g2_family_variant_candidate` 已写入 +8. `g2_request_contract_complete = true` +9. `g2_response_contract_complete = true` +10. `readiness.level = A` + +结论: + +- `G2-B` 已从“只能 fail-close”推进到“可解释最小合同” +- 已可进入候选验证名单 + +### 3.3 G2-C 混合联动变体 + +样本: + +- `线损同期差异报表` +- 产物:`examples/real_scene_batch_round1/skills/real-lineloss-period-diff-r1` + +结果: + +1. 已成功生成,不再 fail-close +2. `workflowArchetype = multi_mode_request` +3. `bootstrap.expectedDomain = 20.76.57.61:18080` +4. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +5. `modes = [primary]` +6. 产物中已恢复 `TG_NO` +7. `g2_family_variant_candidate` 已写入 +8. `g2_request_contract_complete = true` +9. `g2_response_contract_complete = true` +10. `readiness.level = A` + +结论: + +- `G2-C` 已从“混合噪声导致整体阻断”推进到“主链路最小合同可生成” +- 当前可进入候选验证名单 + +## 4. 与上一轮相比的变化 + +上一轮状态: + +1. `G2-A` 已通 +2. `G2-B` fail-close +3. `G2-C` fail-close + +本轮状态: + +1. `G2-A` 保持稳定 +2. `G2-B` 已恢复单周模式合同 +3. `G2-C` 已恢复主链路模式合同 +4. 三份样本都不再被同一种 `G2-A` 模型粗暴处理 + +## 5. 当前结论 + +按本计划的目标口径,本轮已经完成: + +1. `G2` 从“单主样本修通”推进到“至少三类子型可区分” +2. `白银线损周报` 不再只是被动 fail-close +3. `线损同期差异报表` 不再只是被动 fail-close +4. `G2-A/G2-B/G2-C` 都已进入候选验证名单 + +当前最准确的状态判断是: + +- `G2` 家族扩展第一轮已达标 +- 下一步如果继续推进,应切到更大范围的线损变体扩展或真实内网验证 diff --git a/docs/superpowers/reports/2026-04-18-g2-first-round-blocker-summary.md b/docs/superpowers/reports/2026-04-18-g2-first-round-blocker-summary.md new file mode 100644 index 0000000..ab643ce --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g2-first-round-blocker-summary.md @@ -0,0 +1,141 @@ +# G2 家族首轮 Blocker 汇总 + +日期:2026-04-18 + +样本范围: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +对应生成产物: + +1. `examples/real_scene_batch_round1/skills/real-tq-lineloss-report-r1` +2. `examples/real_scene_batch_round1/skills/real-baiyin-lineloss-weekly-r1` +3. `examples/real_scene_batch_round1/skills/real-lineloss-period-diff-r1` + +## 1. 当前结论 + +这三个 `G2` 真实样本已经足够说明,当前生成链在“线损多模式报表家族”上存在稳定的共性 blocker,而不是单个场景的偶发误差。 + +当前状态可以概括为: + +1. 能抓到部分线损业务信号 +2. 但不会把这些信号重建成 `multi_mode_request.month_week_table` +3. 反而会稳定坍缩到 `paginated_enrichment` + +## 2. 已稳定复现的共性 blocker + +### 2.1 archetype 稳定误判 + +三个样本都被判成: + +- `paginated_enrichment` + +而不是: + +- `multi_mode_request` + +这说明当前 workflow 判定时,分页、补数、过滤等噪声信号权重明显高于 `month/week` 模式矩阵信号。 + +### 2.2 bootstrap 稳定落错 + +三个样本都落到: + +- `expected_domain = 20.77.115.36:31051` +- `target_url = http://20.77.115.36:31051` + +这说明当前 bootstrap 选择逻辑没有把“真实业务承载页”与“其他可见入口页面”区分开。 + +### 2.3 modes 稳定丢失 + +三个样本都有这些共性: + +1. `modes = []` +2. `defaultMode = "month"` +3. `modeSwitchField = "period_mode"` + +也就是说,系统能感知到 mode 相关词,但不会把它编译成真正可执行的 mode 结构。 + +### 2.4 request contract 稳定缺失 + +三个样本都出现: + +1. `params = []` +2. `requestTemplate = null` +3. 最终脚本把 `args` 直接整体塞进请求体 + +这说明当前链路没有恢复出线损家族真正需要的 mode-specific 请求合同。 + +### 2.5 column defs 稳定缺失 + +三个样本都出现: + +1. `columnDefs = []` +2. `normalizeRules.requiredFields = []` +3. 脚本运行时用 `rows[0]` 动态反推列 + +这意味着业务列语义完全没有固化下来。 + +### 2.6 endpoint 污染稳定存在 + +三个样本都同时抓到了: + +1. 线损业务接口 +2. 95598/营销等其他业务接口 +3. 大量第三方库、文档、外链 URL + +这说明当前 endpoint candidate 过滤还不够,静态资源和依赖库内容仍在污染主业务判定。 + +### 2.7 readiness 稳定过度乐观 + +三个样本都给出: + +- `Readiness: A` + +但实际核心业务合同明显没有闭合。 + +这说明 readiness 现在还主要在反映“结构上能生成”,而不是“业务上接近可运行”。 + +## 3. 目前可以确认的正向信号 + +虽然不通过,但也有三类正向信号已经稳定出现: + +1. 能抓到线损家族核心 endpoint 名称 +2. 能抓到 `month/week/tjzq/mode` 这类模式信号 +3. 能把 `localhost:*` 归入宿主依赖证据,而不是直接把它选成 bootstrap + +这说明系统不是“完全盲”,而是“证据有了,但主链重建失败”。 + +## 4. 当前最小判断 + +基于前三个 `G2` 样本,当前可以给出一个更稳定的最小判断: + +1. 当前链路已具备 `G2` 证据抽取能力 +2. 当前链路尚不具备 `G2` 业务主链重建能力 +3. 因此现在不能把 `G2` 结果视为可进入内网验证的候选 skill + +## 5. 第三个 G2 样本验证结果 + +`线损同期差异报表` 已完成首轮生成,结果继续复现以下问题: + +1. archetype 是否仍会坍缩到 `paginated_enrichment` +2. bootstrap 是否仍会被错误锚到 `20.77.115.36:31051` +3. endpoint 污染是否仍然严重 +4. 当场景引入更明显的对比链、secondary chain 后,是否会进一步放大当前偏差 + +当前结果表明: + +1. 以上问题全部继续复现 +2. 且第三个样本的 `filter` 证据还出现了更明显的脏串污染 +3. 因此 `G2` blocker 画像可以视为已经稳定 + +## 6. 当前阶段是否进入 G1/G3 + +建议:`可以结束 G2 首轮收敛,再决定是否切换` + +原因: + +1. 执行表顺序本来就是先打透 `G2` +2. 当前 `G2` blocker 已经通过第三个样本完成收口 +3. 后续如果继续推进,应该进入“整理首轮迁移报告”或“开始 G1/G3 首轮样本生成”,而不是回头继续补更多 `G2` 观察样本 diff --git a/docs/superpowers/reports/2026-04-18-g2-second-round-remediation-report.md b/docs/superpowers/reports/2026-04-18-g2-second-round-remediation-report.md new file mode 100644 index 0000000..ec146ed --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g2-second-round-remediation-report.md @@ -0,0 +1,159 @@ +# G2 第二轮整改回归报告 + +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-g2-remediation-plan.md` +> Family: `G2` + +## 1. 本轮整改落地内容 + +本轮严格围绕 `G2` 家族整改,实际落地了以下链路修复: + +1. `multi_mode_request` 识别优先级前置,避免被分页噪声重新压回 `paginated_enrichment` +2. `bootstrap` 候选从“仅命名 URL”扩展为“命名 URL + endpoint 业务根路径”联合评分,并保留已有 `sourceUrl` 回归口径 +3. `request_template` 支持以下真实场景写法: + - `data: JSON.stringify(datas)` + - `data: ${JSON.stringify(datas)}` + - `let/const/var datas = {...}` 变量间接引用 + - 对象简写字段,如 `weekSfdate` +4. `cols1/cols2` 改为平衡括号抽取,不再依赖 `];` 结尾,修复真实线损场景中两组列被吞并的问题 +5. `G2` readiness gate 继续维持 fail-close: + - `g2_modes_present` + - `g2_request_contract_complete` + - `g2_response_contract_complete` + +## 2. 回归测试结果 + +本地回归: + +- `cargo test --test scene_generator_test -- --nocapture` +- 结果:`15 passed` + +新增和强化的回归口径已覆盖: + +1. `G2` 噪声场景不再误判为 `paginated_enrichment` +2. `localhost` 仍只作为宿主依赖保留 +3. `data -> JSON.stringify(var)` 的请求模板能够恢复 +4. 缺少 `G2` 合同时继续被阻断 +5. 不带分号的 `cols1/cols2` 仍能正确拆出 `month/week` 列定义 + +## 3. 真实样本第二轮结果 + +### 3.1 台区线损大数据-月_周累计线损率统计分析 + +样本路径: + +- 源场景:`D:\desk\智能体资料\全量业务场景\一平台场景\台区线损大数据-月_周累计线损率统计分析` +- 生成产物:`examples/real_scene_batch_round1/skills/real-tq-lineloss-report-r4` + +结果: + +1. 已成功生成,不再 fail-close +2. `workflowArchetype = multi_mode_request` +3. `bootstrap.expectedDomain = 20.76.57.61:18080` +4. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +5. `modes = [month, week]` +6. `month/week` 均具备非空 `requestTemplate` +7. `month/week` 均具备非空 `columnDefs` +8. `responsePath = content` +9. `g2_request_contract_complete = true` +10. `g2_response_contract_complete = true` +11. `readiness.level = A` + +结论: + +- 该样本已进入“候选验证名单” + +### 3.2 白银线损周报 + +样本路径: + +- 源场景:`D:\desk\智能体资料\全量业务场景\一平台场景\白银线损周报` +- 生成尝试:`real-baiyin-lineloss-weekly-r1` + +结果: + +1. 仍然 fail-close +2. 生成器报错:`workflow evidence is incomplete for archetype multi_mode_request` + +直接证据: + +1. 场景内能抓到周维接口: + - `getYearMonWeekLinelossAnalysisList` +2. 同时还能抓到台区排行接口: + - `getTqLinelossInfoListRank` +3. 但当前最小证据审计中,没有看到与 `tq` 主样本同级的显式 `cols1/cols2` +4. 也没有看到月/周双模式都完整闭合的 response-side 合同证据 + +结论: + +- 当前仍应维持 fail-close +- 该样本说明:`G2` 家族中还存在“只有部分 week/mode 信号,但没有完整列合同”的变体 + +### 3.3 线损同期差异报表 + +样本路径: + +- 源场景:`D:\desk\智能体资料\全量业务场景\一平台场景\线损同期差异报表` +- 生成尝试:`real-lineloss-period-diff-r1` + +结果: + +1. 仍然 fail-close +2. 生成器报错:`workflow evidence is incomplete for archetype multi_mode_request` + +直接证据: + +1. 主要抓到的是: + - `getTqLinelossInfoListRank` + - `getUserElectricList` + - `tqQualifyRateMonitor` +2. 同时还混入了同期系统链路: + - `10.4.39.180/xsgl/...` +3. 当前最小证据审计中,没有看到 `tq` 主样本那样稳定的 `month/week + cols1/cols2` 完整合同 + +结论: + +- 当前仍应维持 fail-close +- 该样本更像“线损主场景 + 同期系统联动”的混合工作流,而不是已经闭合的 `tq` 主报表双模式合同 + +## 4. 与第一轮相比的变化 + +第一轮 blocker: + +1. 主样本会退化成 `paginated_enrichment` +2. `bootstrap` 落错 +3. `request_template` 丢失 +4. `cols1/cols2` 串并,导致 mode response contract 不成立 +5. readiness 会给出失真的高分或错误通行 + +第二轮结果: + +1. 主样本不再退化成 `paginated_enrichment` +2. 主样本 `bootstrap` 已落到 `http://20.76.57.61:18080/gsllys` +3. 主样本 `month/week` 请求模板已恢复 +4. 主样本 `month/week` 列合同已恢复 +5. 主样本已进入候选验证名单 +6. 其余两份样本继续 fail-close,没有被错误放行 + +## 5. 仍保留的 blocker + +本轮没有解决、但已经被明确收敛出来的剩余问题: + +1. `G2` 家族内部并不只有 `tq` 主报表一种结构,至少还存在: + - 周报偏单侧 mode 信号变体 + - 同期差异联动型混合工作流 +2. 当前 endpoint 候选中仍有较多业务外噪声 URL 残留,但已不再阻塞主样本进入候选验证 +3. `G2` 主样本已经达到候选验证门槛,但“家族扩展到更多线损变体”仍未完成 + +## 6. 本轮结论 + +按本计划的目标口径,本轮整改已经完成以下关键结果: + +1. `台区线损大数据-月_周累计线损率统计分析` 已从稳定失败推进到候选验证可用 +2. 另外两份真实样本没有被错误放行,继续保持 fail-close +3. 第二轮整改已经把 `G2` 从“主样本无法成型”推进到“主样本可成型、变体样本被明确隔离” + +当前最准确的状态判断是: + +- `G2` 主样本整改已达标 +- `G2` 家族扩展仍未完成 diff --git a/docs/superpowers/reports/2026-04-18-g3-batch-closure-report.md b/docs/superpowers/reports/2026-04-18-g3-batch-closure-report.md new file mode 100644 index 0000000..793ac5b --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-batch-closure-report.md @@ -0,0 +1,32 @@ +# G3 Batch Closure Report + +> Status: Completed +> Date: 2026-04-18 +> Plan: [2026-04-17-scene-skill-60-to-90-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md) + +## Intent + +Consume the remaining `G3` downstream candidates within the existing paginated-enrichment family contract and close the current Track C queue. + +## Delivered + +1. Added five more `G3` paginated-enrichment expansion fixtures: + - device monitor weekly + - customer satisfaction daily + - repair return analysis + - repair daily control + - business stats +2. Added deterministic request-template coverage and generation regressions for all five fixtures +3. Promoted all remaining current downstream `G3` candidates into formal batch expansion baselines +4. Synchronized family manifest, family results, ledger overlay, and roadmap execution status + +## Acceptance + +1. all promoted fixtures remain inside `paginated_enrichment` +2. all promoted fixtures reuse `ticketNo` as join key +3. aggregate rules stay within existing `aggregate:riskLevel` and `aggregate:sourceType` scope +4. `Track C` remaining queue is now empty + +## Notes + +This closes the current repo-local `G3` batch within the roadmap boundary. It does not introduce a new archetype, runtime family, or external validation scope. diff --git a/docs/superpowers/reports/2026-04-18-g3-candidate-batch-alignment-report.md b/docs/superpowers/reports/2026-04-18-g3-candidate-batch-alignment-report.md new file mode 100644 index 0000000..dab62f3 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-candidate-batch-alignment-report.md @@ -0,0 +1,57 @@ +# G3 Candidate Batch Alignment Report + +## Scope + +This round no longer leaves the `95598-ticket-family-candidate` ledger cluster as a raw batch queue. + +It promotes the batch into a formal `G3` family asset that now records: + +1. the `P0` anchor +2. the first expansion anchor +3. the second-round promoted expansion baselines + +## Delivered + +Added or updated: + +1. `tests/fixtures/generated_scene/g3_candidate_batch_2026-04-18.json` +2. `tests/g3_candidate_batch_test.rs` +3. `tests/fixtures/generated_scene/p1_family_manifest.json` +4. `tests/fixtures/generated_scene/p1_family_results.json` +5. `tests/scene_generator_p1_family_test.rs` +6. `tests/scene_generator_family_policy_test.rs` + +## Batch Meaning + +The batch asset now freezes five things together: + +1. `paginated_enrichment` remains the `G3 / P0-3` anchor baseline +2. `paginated_enrichment_expansion` remains the first expansion baseline +3. `paginated_enrichment_expansion_workorder` is the promoted second expansion baseline +4. `paginated_enrichment_expansion_orderno` is the promoted third expansion baseline +5. the rest of the `95598-ticket-family-candidate` ledger cluster remains the downstream candidate queue + +## Shared Contract Direction + +The batch explicitly records the current reusable `G3` contract direction: + +1. `archetype = paginated_enrichment` +2. pagination fields stay in the `page/pageNum/pageNo/pageSize` family +3. join keys stay in the `ticketNo/workOrderNo/orderNo` family +4. aggregate rules stay in the `riskLevel/sourceType` family + +## Validation + +Passed: + +1. `cargo test --test g3_candidate_batch_test -- --nocapture` +2. `cargo test --test scene_generator_test generator_writes_paginated_enrichment_workorder_expansion_fixture -- --nocapture` +3. `cargo test --test scene_generator_test generator_writes_paginated_enrichment_orderno_expansion_fixture -- --nocapture` +4. `cargo test --test scene_generator_p1_family_test -- --nocapture` +5. `cargo test --test scene_generator_family_policy_test -- --nocapture` + +## Outcome + +`G3` is no longer represented only by one `P0` anchor plus one first expansion fixture. + +It now has a promoted batch-expansion asset that already names the second and third concrete expansion baselines, so the next round can continue from code-backed family assets instead of re-selecting candidates from the ledger snapshot. diff --git a/docs/superpowers/reports/2026-04-18-g3-family-first-round-report.md b/docs/superpowers/reports/2026-04-18-g3-family-first-round-report.md new file mode 100644 index 0000000..5ce469e --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-family-first-round-report.md @@ -0,0 +1,84 @@ +# G3 First-Round Family Expansion Report + +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-g3-paginated-enrichment-plan.md` Phase 4 / WS5 +> Family: `G3 / paginated_enrichment` +> Result: Completed + +## 1. Round Scope + +本轮 `G3` family 扩展不再停留在单一 `P0` 样板,而是形成了: + +1. Representative baseline + - `tests/fixtures/generated_scene/paginated_enrichment` +2. First expansion baseline + - `tests/fixtures/generated_scene/paginated_enrichment_expansion` + +扩展样板承接的是 `G3` 的第一类变体能力: + +1. 分页字段从 `page/pageSize` 扩展到 `pageNum/pageSize` +2. join key 从 `custNo` 扩展到 `ticketNo` +3. 聚合字段从 `charge` 扩展到 `riskLevel` + +## 2. Implemented Family Reuse + +为支持第一扩展样板,本轮已经把以下能力沉到 `G3` 家族实现中: + +1. 分页字段识别支持 `pageNum/pageNo` +2. `risk*` 过滤表达式进入 `G3` 过滤信号 +3. `join key` 推导允许扩展样板保留核心业务键,同时不过滤掉分页键 +4. `aggregate:*` 规则可以从 `row.` 过滤表达式恢复 + +## 3. Expansion Validation Result + +扩展样板当前验证结果: + +1. `workflowArchetype = paginated_enrichment` +2. `readiness.level = A/B` +3. `paginationPlan.pageField = pageNum` +4. `joinKeys` 包含 `ticketNo` +5. `mergeOrDedupeRules` 包含 `aggregate:riskLevel` +6. `exportPlan.entry = exportExcel` +7. `g3_main_request_resolved = true` +8. `g3_join_key_resolved = true` + +## 4. Family-Level Asset Promotion + +本轮结果已经沉到 family 级资产中: + +1. `tests/fixtures/generated_scene/p1_family_manifest.json` + - `G3` 现已记录 `representative + expansion` +2. `tests/fixtures/generated_scene/p1_family_results.json` + - `G3` 现已记录 `expansionRuns = 1` +3. `tests/scene_generator_p1_family_test.rs` + - `G3` family test 现已实际运行 expansion fixture + +## 5. Regression Record + +本轮已通过: + +```powershell +cargo test --test scene_generator_p1_family_test -- --nocapture +cargo test --test scene_generator_family_policy_test -- --nocapture +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +## 6. Current Family Conclusion + +当前可以确认: + +1. `G3` 已不再只有单个 `P0` 样板 +2. `G3` 已拥有第一扩展样板和 family 级断言 +3. representative 与 expansion 已共同进入 repo-local baseline +4. `G3` 第一轮 family expansion 已达成 + +## 7. Remaining Space + +本轮完成的是 `G3` 第一扩展样板,不代表 `G3` 全部复杂工单家族都已经覆盖。 + +后续如果继续扩展,优先方向应为: + +1. 第二个 `G3` 扩展样板 +2. 更复杂的 host bridge / localhost 依赖场景 +3. 更强的多源聚合和导出前置动作恢复 diff --git a/docs/superpowers/reports/2026-04-18-g3-next-round-expansion-report.md b/docs/superpowers/reports/2026-04-18-g3-next-round-expansion-report.md new file mode 100644 index 0000000..b510292 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-next-round-expansion-report.md @@ -0,0 +1,54 @@ +# G3 Next-Round Expansion Report + +## Scope + +This round continues consuming the `95598-ticket-family-candidate` downstream queue instead of stopping at the second-round expansion baselines. + +Added fixtures: + +1. `tests/fixtures/generated_scene/paginated_enrichment_expansion_source_distribution` +2. `tests/fixtures/generated_scene/paginated_enrichment_expansion_service_risk` + +## Contract Variants Added + +The next round validates two more `G3` reusable variants: + +1. `pageNum + ticketNo + aggregate:sourceType` +2. `pageNo + ticketNo + aggregate:riskLevel` + +## Delivered + +Updated: + +1. `tests/fixtures/generated_scene/p1_family_manifest.json` +2. `tests/fixtures/generated_scene/p1_family_results.json` +3. `tests/fixtures/generated_scene/g3_candidate_batch_2026-04-18.json` +4. `tests/g3_candidate_batch_test.rs` +5. `tests/scene_generator_test.rs` +6. `tests/scene_generator_p1_family_test.rs` + +## Validation + +Passed: + +1. deterministic analysis for both new fixtures +2. package generation for both new fixtures +3. family-level reusable migration assertions for both new fixtures +4. batch-asset assertions for the promoted fourth and fifth expansion anchors + +## Outcome + +`G3` now has: + +1. one `P0` anchor +2. one first expansion fixture +3. two second-round expansion fixtures +4. two next-round promoted expansion fixtures + +The current `paginated_enrichment` family baseline is now validated across: + +1. `ticketNo` +2. `workOrderNo` +3. `orderNo` + +and across both `riskLevel` and `sourceType` aggregate families with supported pagination-field variants. diff --git a/docs/superpowers/reports/2026-04-18-g3-p0-validation-report.md b/docs/superpowers/reports/2026-04-18-g3-p0-validation-report.md new file mode 100644 index 0000000..800c1e4 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-p0-validation-report.md @@ -0,0 +1,103 @@ +# G3 P0 Validation Report + +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-g3-paginated-enrichment-plan.md` Phase 3 -> Phase 4 +> Archetype: `G3 / paginated_enrichment` +> Result: Passed + +## 1. Conclusion + +`G3` 的 `P0-3` 基线已经形成正式闭环。 + +当前 repo-local `P0` 样板 `tests/fixtures/generated_scene/paginated_enrichment` 已经稳定恢复并通过以下关键合同: + +1. `main_request` +2. `pagination_plan` +3. `enrichment_requests` +4. `join_keys` +5. `merge_or_dedupe_rules` +6. `export_plan` + +同时,`G3` 的 `fail-closed` 语义已经固定,至少覆盖: + +1. `join_key_missing` +2. `pagination_incomplete` +3. `export_only_without_business_chain` + +## 2. Validation Inputs + +本轮 repo-local `P0` 校验基于以下资产: + +1. Fixture: + - `tests/fixtures/generated_scene/paginated_enrichment/index.html` +2. Canonical: + - `tests/fixtures/generated_scene/p0_canonical_answers/p0-3-paginated-enrichment.scene-ir.json` +3. Tests: + - `tests/scene_generator_canonical_test.rs` + - `tests/scene_generator_test.rs` + +关键回归命令: + +```powershell +cargo test --test scene_generator_canonical_test -- --nocapture +cargo test --test scene_generator_test -- --nocapture +``` + +## 3. P0 Restored Semantics + +当前 `P0-3` 样板已经稳定恢复: + +1. Main request + - endpoint url: `http://yx.gs.sgcc.com.cn/marketing/userList` + - response path: `rows` +2. Pagination plan + - page field: `page` + - page size field: `pageSize` + - termination rule: `stop_when_page_rows_empty` +3. Enrichment request + - endpoint url: `http://yx.gs.sgcc.com.cn/marketing/userCharges` + - consumed field: `custNo` +4. Join key + - `custNo` +5. Merge / dedupe rules + - `dedupe:custNo` + - `aggregate:charge` +6. Export plan + - entry: `exportExcel` + +## 4. Acceptance Checklist Result + +对照 `WS4 / WS5` 的 `P0` 检查项,当前结果如下: + +1. `workflowArchetype = paginated_enrichment` +2. `mainRequest.responsePath = rows` +3. `paginationPlan.pageField = page` +4. `joinKeys = [custNo]` +5. `mergeOrDedupeRules` 包含 `aggregate:charge` +6. `exportPlan.entry = exportExcel` +7. `g3_join_key_resolved = true` +8. `localhost_dependency_candidate` 未污染主业务链 +9. `readiness.level = A` + +## 5. Failure Taxonomy Baseline + +本轮已被测试钉住的 `G3` 失败 taxonomy: + +1. `join_key_missing` +2. `pagination_incomplete` +3. `export_only_without_business_chain` + +对应测试: + +1. `paginated_enrichment_readiness_marks_join_key_missing_taxonomy` +2. `paginated_enrichment_readiness_marks_pagination_incomplete_taxonomy` +3. `paginated_enrichment_readiness_marks_export_only_without_business_chain_taxonomy` + +## 6. Final Status + +对照 `2026-04-18-g3-paginated-enrichment-plan.md`: + +1. `P0 canonical` 已冻结 +2. `P0 acceptance checklist` 已转化为可执行测试 +3. `P0 failure taxonomy` 已形成最小稳定集合 +4. `P0-3` 当前可作为 `G3` 后续 family 扩展的统一校准源 diff --git a/docs/superpowers/reports/2026-04-18-g3-second-round-expansion-report.md b/docs/superpowers/reports/2026-04-18-g3-second-round-expansion-report.md new file mode 100644 index 0000000..8c76794 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-second-round-expansion-report.md @@ -0,0 +1,50 @@ +# G3 Second-Round Expansion Report + +## Scope + +This round takes `G3` beyond the first expansion fixture and lands two additional repo-local expansion baselines from the `95598/工单家族候选` batch direction. + +Added fixtures: + +1. `tests/fixtures/generated_scene/paginated_enrichment_expansion_workorder` +2. `tests/fixtures/generated_scene/paginated_enrichment_expansion_orderno` + +## Contract Variants Added + +The second round explicitly validates two more `G3` join-key and pagination variants: + +1. `pageNo + workOrderNo + aggregate:sourceType` +2. `page + orderNo + aggregate:sourceType` + +## Delivered + +Updated: + +1. `tests/fixtures/generated_scene/p1_family_manifest.json` +2. `tests/fixtures/generated_scene/p1_family_results.json` +3. `tests/scene_generator_p1_family_test.rs` +4. `tests/scene_generator_test.rs` + +## Validation + +Passed: + +1. deterministic analysis for both new fixtures +2. package generation for both new fixtures +3. family-level reusable migration assertions for both new fixtures + +## Outcome + +`G3` now has: + +1. one P0 anchor +2. one first expansion fixture +3. two second-round expansion fixtures + +So the family baseline now proves that the current `paginated_enrichment` contract is reusable across: + +1. `ticketNo` +2. `workOrderNo` +3. `orderNo` + +with corresponding pagination-field variants. diff --git a/docs/superpowers/reports/2026-04-18-g3-timeout-warning-expansion-report.md b/docs/superpowers/reports/2026-04-18-g3-timeout-warning-expansion-report.md new file mode 100644 index 0000000..af166a3 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g3-timeout-warning-expansion-report.md @@ -0,0 +1,28 @@ +# G3 Timeout Warning Expansion Report + +> Status: Completed +> Date: 2026-04-18 +> Plan: [2026-04-17-scene-skill-60-to-90-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md) + +## Intent + +Promote one more `G3` downstream candidate within the existing paginated-enrichment family contract, without introducing a new archetype or runtime scope. + +## Delivered + +1. Added `tests/fixtures/generated_scene/paginated_enrichment_expansion_timeout_warning` +2. Added deterministic and generation regressions for the timeout-warning sample +3. Promoted `ticket_timeout_warning_detail` into the formal `G3` batch expansion baseline +4. Synchronized family manifest, family results, ledger overlay, and roadmap execution status + +## Acceptance + +1. archetype remains `paginated_enrichment` +2. pagination field is recovered as `pageNum` +3. join key remains `ticketNo` +4. aggregate rule remains `aggregate:riskLevel` +5. `Track C` remaining queue drops from six to five + +## Notes + +This promotion stays inside the roadmap boundary because it reuses the existing `G3` family contract and only consumes one downstream candidate from the current batch asset. diff --git a/docs/superpowers/reports/2026-04-18-g6-g7-g8-family-asset-sync-report.md b/docs/superpowers/reports/2026-04-18-g6-g7-g8-family-asset-sync-report.md new file mode 100644 index 0000000..deff68c --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g6-g7-g8-family-asset-sync-report.md @@ -0,0 +1,61 @@ +# G6 G7 G8 Family Asset Sync Report + +## Scope + +This round does not change `G6/G7/G8` runtime contracts themselves. + +It promotes the three boundary families into formal family-level assets so they are no longer only protected by implementation tests. + +## Synced Assets + +Updated: + +1. `tests/fixtures/generated_scene/p1_family_manifest.json` +2. `tests/fixtures/generated_scene/p1_family_results.json` +3. `tests/fixtures/generated_scene/family_expansion_policy.json` +4. `tests/scene_generator_p1_family_test.rs` +5. `tests/scene_generator_family_policy_test.rs` + +## Result + +`G6`, `G7`, and `G8` are now represented at the same family-asset layer as the existing mainline families. + +That means the repo now preserves these facts as first-class assets: + +1. `G6` is an independent `host_bridge_workflow` family +2. `G7` is an independent `multi_endpoint_inventory` family +3. `G8` is an independent `local_doc_pipeline` family +4. none of them should fall back into `G1`, `G1-E`, or `G3` + +## Policy Position + +The policy asset now separates: + +1. mainline groups: `G1/G2/G3` +2. boundary-runtime groups: `G6/G7/G8` +3. deferred groups: `G4/G5` + +This matches the current roadmap state: + +1. mainline capability growth still centers on `G1/G2/G3` +2. `G6/G7/G8` are no longer just reassignment notes; they now have code-backed runtime contracts and family-level preservation + +## Validation + +Passed: + +- `cargo test --test scene_generator_p1_family_test -- --nocapture` +- `cargo test --test scene_generator_test -- --nocapture` +- `cargo test --test scene_generator_family_policy_test -- --nocapture` +- `cargo test --test scene_generator_canonical_test -- --nocapture` + +## Outcome + +The roadmap has moved past pure boundary reassignment. + +`G6/G7/G8` now exist as synchronized family assets with: + +1. representative migration baselines +2. results records +3. policy placement +4. regression protection diff --git a/docs/superpowers/reports/2026-04-18-g6-host-bridge-first-slice-report.md b/docs/superpowers/reports/2026-04-18-g6-host-bridge-first-slice-report.md new file mode 100644 index 0000000..ebd48bf --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g6-host-bridge-first-slice-report.md @@ -0,0 +1,61 @@ +# G6 Host Bridge First Slice Report + +> Date: 2026-04-18 +> Scope: `G6` classification and fail-closed safety slice + +## Summary + +The first `G6` implementation slice is complete. + +This round did not attempt runnable host bridge execution. It implemented the safer first step: + +1. new `host_bridge_workflow` archetype +2. deterministic host bridge action detection +3. `G6` classification priority over `G1-E` +4. `G6` readiness gates +5. fail-closed generation behavior +6. regression protection so ordinary localhost export noise does not become `G6` + +## Assets Updated + +1. `src/generated_scene/ir.rs` +2. `src/generated_scene/analyzer.rs` +3. `src/generated_scene/generator.rs` +4. `tests/fixtures/generated_scene/g6_host_bridge_workflow/index.html` +5. `tests/scene_generator_test.rs` + +## Behavioral Result + +The new representative fixture validates: + +1. `workflowArchetype = host_bridge_workflow` +2. host bridge action evidence includes `sgBrowerserJsAjax2` +3. host runtime evidence includes `localhost:13313` +4. business callbacks include `getWorkOrderToDoList` +5. business callbacks include `queryMeterPlanFormulateApp` +6. generation fails closed with `host_bridge_workflow` + +## Boundary Correction + +The implementation initially exposed one boundary risk: plain `localhost:*` noise could promote a scene to `G6`. + +The classifier was corrected so `G6` requires explicit host bridge action evidence. `localhost:*` alone remains host-runtime evidence and does not define the archetype. + +## Validation + +Executed: + +```powershell +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_p1_family_test -- --nocapture +cargo test --test scene_generator_family_policy_test -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +All target regressions passed. + +## Result + +`G6` is now safely separated from `G1/G1-E` at classification time. + +The family is intentionally still fail-closed until a real host bridge runtime contract is designed and implemented. diff --git a/docs/superpowers/reports/2026-04-18-g6-host-bridge-runtime-contract-report.md b/docs/superpowers/reports/2026-04-18-g6-host-bridge-runtime-contract-report.md new file mode 100644 index 0000000..57db725 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g6-host-bridge-runtime-contract-report.md @@ -0,0 +1,52 @@ +# G6 Host Bridge Runtime Contract Report + +> Date: 2026-04-18 +> Scope: `G6` minimum runtime contract implementation + +## Summary + +`G6` has moved from pure fail-closed classification to a minimum runnable runtime contract. + +The implementation does not redesign the browser host transport. Instead it generates a conservative `host_bridge_workflow` script that: + +1. preserves host bridge actions +2. preserves localhost host-runtime dependencies +3. lists callback business endpoints +4. invokes host bridge APIs when available +5. invokes callback endpoints through the existing browser request path +6. returns blocked/error/partial/ok artifact statuses instead of pretending full equivalence + +## Assets Updated + +1. `src/generated_scene/generator.rs` +2. `tests/scene_generator_test.rs` + +## Contract + +The minimal `G6` contract is now: + +1. at least one `host_bridge` workflow step +2. at least one `callback_request` workflow step +3. at least one business endpoint + +If that contract is missing, generation still fails closed. + +## Validation + +Executed: + +```powershell +cargo test --test scene_generator_test g6 -- --nocapture +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_p1_family_test -- --nocapture +cargo test --test scene_generator_family_policy_test -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +All target regressions passed. + +## Result + +`G6` now has a minimum generated package path while retaining a fail-closed negative test for incomplete contracts. + +The next runtime-contract target is `G7 multi_endpoint_inventory`. diff --git a/docs/superpowers/reports/2026-04-18-g7-multi-endpoint-first-slice-report.md b/docs/superpowers/reports/2026-04-18-g7-multi-endpoint-first-slice-report.md new file mode 100644 index 0000000..5f23781 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g7-multi-endpoint-first-slice-report.md @@ -0,0 +1,49 @@ +# G7 Multi Endpoint Inventory First Slice Report + +> Date: 2026-04-18 +> Scope: `G7` classification and fail-closed safety slice + +## Summary + +The first `G7` implementation slice is complete. + +This round added the `multi_endpoint_inventory` archetype and a repo-local representative fixture for the `计量资产库存统计` boundary family. + +The result is intentionally fail-closed: `G7` can now be identified and prevented from falling back into `G1/G1-E`, but runnable aggregation is not implemented yet. + +## Assets Updated + +1. `src/generated_scene/ir.rs` +2. `src/generated_scene/analyzer.rs` +3. `src/generated_scene/generator.rs` +4. `tests/fixtures/generated_scene/g7_multi_endpoint_inventory/index.html` +5. `tests/scene_generator_test.rs` + +## Behavioral Result + +The representative fixture validates: + +1. `workflowArchetype = multi_endpoint_inventory` +2. inventory endpoint count is at least five +3. inventory endpoint names include `assetStatsQueryMeter` +4. inventory endpoint names include `assetStatsQueryJlGnModule` +5. generation fails closed with `multi_endpoint_inventory` + +## Validation + +Executed: + +```powershell +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_p1_family_test -- --nocapture +cargo test --test scene_generator_family_policy_test -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +All target regressions passed. + +## Result + +`G7` is now safely separated from `G1/G1-E` at classification time. + +The family remains fail-closed until a formal multi-endpoint inventory aggregation contract is implemented. diff --git a/docs/superpowers/reports/2026-04-18-g7-multi-endpoint-runtime-contract-report.md b/docs/superpowers/reports/2026-04-18-g7-multi-endpoint-runtime-contract-report.md new file mode 100644 index 0000000..fa22d1d --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g7-multi-endpoint-runtime-contract-report.md @@ -0,0 +1,46 @@ +# G7 Multi-Endpoint Inventory Runtime Contract Report + +## Scope + +This round upgrades `G7 / multi_endpoint_inventory` from safe-slice classification to a minimal runnable runtime contract. + +Representative fixture: + +- `tests/fixtures/generated_scene/g7_multi_endpoint_inventory` + +## Delivered + +1. Added a minimal `G7` runtime contract in the generator. +2. Promoted `g7_fail_closed` from unconditional block to conditional pass when the inventory contract is complete. +3. Added a dedicated browser compiler path for `multi_endpoint_inventory`. +4. Replaced the old positive fail-closed test with: + - runnable generation test + - incomplete-contract negative test + +## Minimal Contract + +`G7` is treated as complete when all of the following are present: + +1. At least 3 `inventory_request` workflow steps +2. At least 3 API endpoints +3. One `inventory_aggregate` workflow step + +## Generated Runtime Shape + +The generated browser script now includes: + +1. `inventoryEndpoints()` +2. `aggregateEntry()` +3. multi-endpoint request execution +4. merged artifact payload with `workflow_archetype = multi_endpoint_inventory` + +## Validation + +Passed: + +- `cargo test --test scene_generator_test g7 -- --nocapture` +- `cargo test --test scene_generator_test -- --nocapture` + +## Outcome + +`G7` no longer stops at classification-only safe slice. It now supports a minimal runnable contract while still fail-closing incomplete manual `SceneIr` inputs. diff --git a/docs/superpowers/reports/2026-04-18-g8-local-doc-pipeline-first-slice-report.md b/docs/superpowers/reports/2026-04-18-g8-local-doc-pipeline-first-slice-report.md new file mode 100644 index 0000000..dacceda --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g8-local-doc-pipeline-first-slice-report.md @@ -0,0 +1,59 @@ +# G8 Local Document Pipeline First Slice Report + +> Date: 2026-04-18 +> Scope: `G8` classification and fail-closed safety slice + +## Summary + +The first `G8` implementation slice is complete. + +This round added the `local_doc_pipeline` archetype and a repo-local representative fixture for the `95598供电服务月报` boundary family. + +The result is intentionally fail-closed: `G8` can now be identified and prevented from falling back into `G1/G1-E/G6`, but runnable local storage, SQL analysis, and document-generation orchestration is not implemented yet. + +## Assets Updated + +1. `src/generated_scene/ir.rs` +2. `src/generated_scene/analyzer.rs` +3. `src/generated_scene/generator.rs` +4. `tests/fixtures/generated_scene/g8_local_doc_pipeline/index.html` +5. `tests/scene_generator_test.rs` + +## Behavioral Result + +The representative fixture validates: + +1. `workflowArchetype = local_doc_pipeline` +2. localhost dependency evidence includes `localhost:13313` +3. local pipeline actions include `definedSqlQuery` +4. local pipeline actions include `docExport` +5. local pipeline actions include `selectData` +6. generation fails closed with `local_doc_pipeline` + +## Boundary Correction + +The first test run showed that `G8` could be captured by `G6` because the fixture also contains host bridge actions. + +The classifier now prioritizes local document pipeline evidence over generic host bridge evidence. This preserves the intended family boundary: + +1. `G6`: host bridge driven query progression +2. `G8`: local persistence, SQL analysis, and document production after data capture + +## Validation + +Executed: + +```powershell +cargo test --test scene_generator_test -- --nocapture +cargo test --test scene_generator_p1_family_test -- --nocapture +cargo test --test scene_generator_family_policy_test -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +All target regressions passed. + +## Result + +`G8` is now safely separated from `G1/G1-E/G6` at classification time. + +The family remains fail-closed until a formal local document pipeline runtime contract is implemented. diff --git a/docs/superpowers/reports/2026-04-18-g8-local-doc-pipeline-runtime-contract-report.md b/docs/superpowers/reports/2026-04-18-g8-local-doc-pipeline-runtime-contract-report.md new file mode 100644 index 0000000..fc8bc1c --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-g8-local-doc-pipeline-runtime-contract-report.md @@ -0,0 +1,49 @@ +# G8 Local Doc Pipeline Runtime Contract Report + +## Scope + +This round upgrades `G8 / local_doc_pipeline` from safe-slice classification to a minimal runnable runtime contract. + +Representative fixture: + +- `tests/fixtures/generated_scene/g8_local_doc_pipeline` + +## Delivered + +1. Added a minimal `G8` runtime contract in the generator. +2. Promoted `g8_fail_closed` from unconditional block to conditional pass when the local document pipeline contract is complete. +3. Added a dedicated browser compiler path for `local_doc_pipeline`. +4. Extended automatic scene recovery so localhost pipeline dependencies become formal `api_endpoints`. +5. Replaced the old positive fail-closed test with: + - runnable generation test + - incomplete-contract negative test + +## Minimal Contract + +`G8` is treated as complete when all of the following are present: + +1. At least one `local_doc_pipeline` step +2. At least one `sql_query` or `doc_query` step +3. At least one `doc_export` step +4. At least one localhost API endpoint + +## Generated Runtime Shape + +The generated browser script now includes: + +1. `localDataEndpoints()` +2. `sqlQueryEntry()` +3. `docExportEntry()` +4. local-stage request execution +5. artifact payload with `workflow_archetype = local_doc_pipeline` + +## Validation + +Passed: + +- `cargo test --test scene_generator_test g8 -- --nocapture` +- `cargo test --test scene_generator_test -- --nocapture` + +## Outcome + +`G8` no longer remains classification-only. It now supports a minimal runtime contract while preserving fail-closed behavior for incomplete manual `SceneIr` inputs. diff --git a/docs/superpowers/reports/2026-04-18-lineloss-family-minimum-contract-table.md b/docs/superpowers/reports/2026-04-18-lineloss-family-minimum-contract-table.md new file mode 100644 index 0000000..0a7d085 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-lineloss-family-minimum-contract-table.md @@ -0,0 +1,154 @@ +# 线损家族新增变体最小合约表 +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-lineloss-family-variant-expansion-plan.md` +> Purpose: 完成 Phase 2 交付物,明确新增线损变体的最小可用口径与候选验证门槛。 + +## 1. 设计原则 + +本表只定义“最低可生成”的合约,不直接承诺完整业务语义重建。原则如下: + +1. 主合约只覆盖能稳定抽取的主链路。 +2. 页面内计算、拼接、下钻逻辑如果尚不能稳态恢复,先作为扩展证据,不强行塞入主合约。 +3. 无法满足主合约的样本继续 `fail-close`。 + +## 2. 合约表 + +### 2.1 `G2-D 预测计算型` + +代表样本: + +1. `线路月度高负损预测` +2. `台区线损台区月度高负损预测` + +最小主合约: + +1. `bootstrap` + - 必须能定位到线损业务域 `gsllys` + - 不要求额外外部系统联动 +2. `primary evidence` + - 存在预测型标识,如 `reportType` 中含预测语义 + - 页面内存在 `lineLossRate / powerLoss / lineLossType` 等计算结果字段 +3. `request contract` + - 至少识别一个主查询入口或主输入数据源 + - 若原页面由文件输入或多个表拼装触发,可先将其视为“single primary collection + local compute” +4. `response / column contract` + - 至少恢复以下列中的 3 个以上: + - `lineId` + - `lineName` + - `lineLossType` + - `lineLossRate` + - `powerLoss` + - 必须恢复 `lineLossRate` 或 `powerLoss` 之一 +5. `normalize contract` + - 对计算结果过滤空值 + - 保证每条记录至少保留主键列和一个核心预测结果列 + +候选验证门槛: + +1. 能稳定落到 `G2-D` +2. 生成结果中存在预测结果核心列 +3. 不把这类场景误判为 `G2-A/G2-B/G2-C` + +扩展证据,不计入主合约: + +1. 复杂白名单逻辑 +2. 多表 Excel 预处理 +3. 全量预测规则完全复刻 + +### 2.2 `G2-E 核查对比型` + +代表样本: + +1. `台区零度户月度用电量与台区线损电量对比核查报表` + +最小主合约: + +1. `bootstrap` + - 必须定位到线损主域 `gsllys` +2. `primary evidence` + - 同时出现 `getUserElectricList` 与 `getTqLinelossInfoListRank` + - 页面内存在按 `TG_NO` 或 `CONS_NO` 的拼接逻辑 +3. `request contract` + - 至少恢复主排名接口和用户电量接口 + - 主接口保留分页参数 `page / rows` +4. `response / column contract` + - 至少恢复以下列中的 4 个以上: + - `TG_NO` + - `TG_NAME` + - `consno` + - `userNmae` + - `thisMonth` + - `beforeMonth1` + - 必须同时具备一个台区维度列和一个用户维度列 +5. `normalize contract` + - 允许先以主链路行为单位输出 + - 允许暂不完整复刻所有拼接字段,但必须能看出“台区 + 用户”的双层语义 + +候选验证门槛: + +1. 能稳定识别为核查对比型,而不是简单周报或双模式报表 +2. 生成结果中保留 `TG_NO` 且保留至少一个用户维度字段 +3. 关联补查证据写入 generation report + +扩展证据,不计入主合约: + +1. 多月历史字段完全展开 +2. 所有异常兜底行填充逻辑 +3. 报表导出标题文案完全一致 + +### 2.3 `G2-F 异常诊断 / 下钻型` + +代表样本: + +1. `线损大数据-窃电分析` + +最小主合约: + +1. `bootstrap` + - 必须定位到线损主域 `gsllys` +2. `primary evidence` + - 出现主筛选接口 `getTqLinelossInfoListRank` + - 出现至少一个诊断或下钻接口: + - `tqAutoDiagnoseAnalyse/search` + - `stealElecAnalyse/getFlqdyhDetailList` + - `stealElecAnalyse/userVoltsAndElecflowMoniter/search` +3. `request contract` + - 至少恢复主筛选请求 + - 至少记录存在一个后续诊断请求 +4. `response / column contract` + - 至少恢复以下列中的 4 个以上: + - `TG_NO` + - `LL_TYPE_NAME` + - `LOSS_PQ` + - `LINELOSS_RATE` + - `remark` + - 若暂不能稳定恢复完整 `remark`,也必须显式标记为诊断链路未完备 +5. `normalize contract` + - 主表记录必须可输出 + - 诊断 remark 允许退化为部分诊断结果或空值,但不能伪造完整语义 + +候选验证门槛: + +1. 能稳定识别该类为“主筛选 + 下钻诊断”链路 +2. generation report 中能写出诊断接口证据 +3. 若 `remark` 合约不完整,readiness 不能伪装成 A + +扩展证据,不计入主合约: + +1. 所有异常分类规则 +2. 各类诊断 remark 拼接细节 +3. 多轮异步诊断时序 + +## 3. 本轮实施结论 + +按当前最小合约成熟度排序: + +1. `G2-D` 最适合先进入实现 +2. `G2-E` 适合紧随其后 +3. `G2-F` 先作为高复杂候选组,只有在主链路实现稳定后再推进 + +因此,Phase 3 的实现顺序固定为: + +1. 先扩 `G2-D` +2. 再扩 `G2-E` +3. `G2-F` 仅保留为候选扩展或回归观察对象 diff --git a/docs/superpowers/reports/2026-04-18-lineloss-family-variant-expansion-report.md b/docs/superpowers/reports/2026-04-18-lineloss-family-variant-expansion-report.md new file mode 100644 index 0000000..62b98c8 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-lineloss-family-variant-expansion-report.md @@ -0,0 +1,155 @@ +# 线损家族变体扩展回归报告 +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-lineloss-family-variant-expansion-plan.md` +> Result: Completed + +## 1. 本轮执行范围 + +本轮严格承接已完成的 `G2-A/G2-B/G2-C` 基线,不回头修改主样本整顿逻辑,只做以下两类新增线损变体扩展: + +1. `G2-E = g2_e_comparison_crosscheck` + 代表真实样本:`台区零度户月度用电量与台区线损电量对比核查报表` +2. `G2-F = g2_f_diagnosis_drilldown` + 代表真实样本:`线损大数据-窃电分析` + +`G2-D` 预测计算型已完成结构盘点,但因真实样本缺少稳定业务接口,暂不进入本轮生成链路实现。 + +## 2. Phase 0 / 1 / 2 交付结果 + +已新增文档: + +1. `docs/superpowers/reports/2026-04-18-lineloss-family-variant-grouping-report.md` +2. `docs/superpowers/reports/2026-04-18-lineloss-family-minimum-contract-table.md` + +完成内容: + +1. 冻结 `G2-A/G2-B/G2-C` 为线损家族既有基线 +2. 新增 `G2-D/G2-E/G2-F` 三个扩展方向 +3. 明确本轮实际实施对象收敛为 `G2-E/G2-F` +4. 为新增变体建立最小合约与候选验证门槛 + +## 3. Phase 3 实现结果 + +### 3.1 新增 fixture + +1. `tests/fixtures/generated_scene/g2_comparison_crosscheck/index.html` +2. `tests/fixtures/generated_scene/g2_diagnosis_drilldown/index.html` + +### 3.2 新增分类与生成支持 + +已扩展: + +1. `src/generated_scene/analyzer.rs` + - 新增 `G2E` + - 新增 `G2F` + - `G2-E` 判定口径: + - 存在 `getTqLinelossInfoListRank` + - 存在 `getUserElectricList` + - 同时出现用户 / 台区拼接信号 + - `G2-F` 判定口径: + - 存在主筛选接口 `getTqLinelossInfoListRank` + - 存在诊断或下钻接口 `tqAutoDiagnoseAnalyse/search` / `stealElecAnalyse/*` +2. `src/generated_scene/generator.rs` + - 新增 `G2-E` 最小 mode:`comparison` + - 新增 `G2-F` 最小 mode:`diagnosis` + - readiness gate 已接纳两类新 mode +3. `tests/scene_generator_test.rs` + - 新增 `G2-E` 分类测试 + - 新增 `G2-F` 分类测试 + - 新增 `G2-E` 生成测试 + - 新增 `G2-F` 生成测试 + +### 3.3 本地回归结果 + +执行命令: + +```powershell +cargo test --test scene_generator_test -- --nocapture +``` + +结果: + +1. `23 passed` +2. 原有 `G2-A/G2-B/G2-C` 未回退 +3. 新增 `G2-E/G2-F` fixture 分类与生成通过 + +## 4. Phase 4 真实样本回归结果 + +### 4.1 `G2-E` 核查对比型 + +真实样本: + +1. `台区零度户月度用电量与台区线损电量对比核查报表` + +生成产物: + +1. `examples/real_scene_batch_round2/skills/real-zero-consumer-crosscheck-r1` + +结果: + +1. `workflowArchetype = multi_mode_request` +2. `g2_family_variant_candidate = g2_e_comparison_crosscheck` +3. `bootstrap.expectedDomain = 20.76.57.61:18080` +4. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +5. `defaultMode = comparison` +6. `columnDefs` 已恢复 `TG_NO / TG_NAME / consno / userNmae / thisMonth / beforeMonth1` +7. `readiness.level = A` + +结论: + +1. 已从“未归类场景”推进到“可进入候选 skill 阶段” +2. 当前生成结果保住了台区 + 用户双层语义 + +### 4.2 `G2-F` 异常诊断 / 下钻型 + +真实样本: + +1. `线损大数据-窃电分析` + +生成产物: + +1. `examples/real_scene_batch_round2/skills/real-steal-analysis-r1` + +结果: + +1. `workflowArchetype = multi_mode_request` +2. `g2_family_variant_candidate = g2_f_diagnosis_drilldown` +3. `bootstrap.expectedDomain = 20.76.57.61:18080` +4. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +5. `defaultMode = diagnosis` +6. `columnDefs` 已恢复 `TG_NO / LL_TYPE_NAME / LOSS_PQ / LINELOSS_RATE / remark` +7. generation report 中保留了 `tqAutoDiagnoseAnalyse/search`、`stealElecAnalyse/getFlqdyhDetailList`、`stealElecAnalyse/userVoltsAndElecflowMoniter/search` 证据 +8. `readiness.level = A` + +结论: + +1. 已从“高复杂未归类场景”推进到“可候选验证的诊断型变体” +2. 当前版本恢复的是主链路最小诊断合约,不承诺完整 remark 业务语义 + +## 5. 当前残留问题 + +本轮 plan 范围内已达标,但仍有以下残留: + +1. `G2-D` 预测计算型尚未进入实现 + - 根因:真实样本主要是本地计算与文件输入,缺少稳定业务接口 +2. 真实样本 HTML 中仍含大量第三方或非目标业务 URL 噪声 + - 当前不会阻塞 `G2-E/G2-F` 候选生成 + - 但后续应继续补“目标业务域去噪”能力 +3. `G2-F` 的完整诊断 remark 仍未重建 + - 当前只恢复最小诊断合约,不等于业务语义完全还原 + +## 6. 计划完成判定 + +对照 `2026-04-18-lineloss-family-variant-expansion-plan.md`,本轮已完成: + +1. Phase 0:冻结线损家族现有基线 +2. Phase 1:完成线损变体分组和代表样本清单 +3. Phase 2:完成新增变体最小合约表 +4. Phase 3:按分组扩展 `fixture / test / analyzer / generator / readiness` +5. Phase 4:完成两类新增真实样本回归并输出扩展报告 + +最终判定: + +1. 至少两类新增线损变体已进入候选验证阶段 +2. 原有 `G2-A/G2-B/G2-C` 未回退 +3. `G2-D` 保留为下一阶段候选组 diff --git a/docs/superpowers/reports/2026-04-18-lineloss-family-variant-grouping-report.md b/docs/superpowers/reports/2026-04-18-lineloss-family-variant-grouping-report.md new file mode 100644 index 0000000..60ae262 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-lineloss-family-variant-grouping-report.md @@ -0,0 +1,136 @@ +# 线损家族变体分组报告 +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-lineloss-family-variant-expansion-plan.md` +> Purpose: 完成 Phase 0 / Phase 1 交付物,冻结当前线损家族基线,并建立后续扩展分组清单。 + +## 1. 当前已冻结基线 + +本轮扩展以前,线损家族中已经可稳定识别并生成的子型为: + +1. `G2-A = g2_a_dual_mode_baseline` + 代表样本:`台区线损大数据-月_周累计线损率统计分析` + 特征:同一主链路下同时存在月 / 周两套模式,请求和列定义都较完整。 +2. `G2-B = g2_b_weekly_single_mode` + 代表样本:`白银线损周报` + 特征:仍属于线损主链路,但只有周报单侧模式,需要用最小周报合约兜住生成。 +3. `G2-C = g2_c_mixed_linked_workflow` + 代表样本:`线损同期差异报表` + 特征:主链路仍是线损查询,但同时联动外部系统或二次查询,只能先恢复主链路最小合约。 + +上述三类已经在上一轮回归中通过本地测试,并在真实样本重生成为候选 skill,不再回退到“只修主样本”的阶段。 + +## 2. 线损场景盘点结果 + +本次从 `D:\desk\智能体资料\全量业务场景\一平台场景` 中筛出的线损相关场景为: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` +4. `台区零度户月度用电量与台区线损电量对比核查报表` +5. `线损大数据-窃电分析` +6. `线路月度高负损预测` +7. `台区线损台区月度高负损预测` +8. `供电所线路电量统计` +9. `线路失电信息明细` + +其中 `8` 和 `9` 与当前线损主家族相关度较弱,不纳入本轮优先扩展对象。 + +## 3. 结构化分组 + +### 3.1 已落地组 + +1. `G2-A 双模式主报表型` + 代表样本:`台区线损大数据-月_周累计线损率统计分析` +2. `G2-B 周报单侧模式型` + 代表样本:`白银线损周报` +3. `G2-C 混合联动主链路型` + 代表样本:`线损同期差异报表` + +### 3.2 新增候选组 + +1. `G2-D 预测计算型` + 代表样本: + - `线路月度高负损预测` + - `台区线损台区月度高负损预测` + 结构特征: + - 页面内存在 `reportType = monthlyHighNegativeLossPredictionForLines` 或同类预测型标识 + - 结果并非直接来自标准 `cols1/cols2` 表头,而是先读取多个输入表或数据块,再在页面内计算 `lineLossRate / powerLoss / lineLossType` + - 导出字段以“预测结果字段”为主,如 `lineId / lineName / lineLossType / lineLossRate / powerLoss` + 结论: + - 这类不是传统 G2 主报表直接抽列,而是“取数 + 计算 + 导出”的计算型线损变体 + - 适合优先扩展,因为有两个高度相似样本,可做成同一家族 + +2. `G2-E 核查对比型` + 代表样本: + - `台区零度户月度用电量与台区线损电量对比核查报表` + 结构特征: + - 同时调用 `getUserElectricList` 与 `getTqLinelossInfoListRank` + - 页面内做数据拼接、按 `TG_NO / CONS_NO` 关联用户和台区数据 + - 导出字段同时含台区维度和用户维度,如 `TG_NO / TG_NAME / consno / thisMonth / beforeMonth1` + 结论: + - 该组更像“主链路 + 明细补查 + 拼接报表” + - 与 `G2-C` 有相似性,但最终产物比 `G2-C` 更偏“核查对比报表”而不是简单主链路恢复 + +3. `G2-F 异常诊断 / 下钻型` + 代表样本: + - `线损大数据-窃电分析` + 结构特征: + - 先取 `getTqLinelossInfoListRank` + - 再调用 `tqAutoDiagnoseAnalyse/search`、`stealElecAnalyse/getFlqdyhDetailList`、`stealElecAnalyse/userVoltsAndElecflowMoniter/search` + - 页面中通过多次诊断接口拼装 `remark` + - 导出字段核心列为 `TG_NO / LOSS_PQ / LINELOSS_RATE / remark` + 结论: + - 该组属于“线损主表筛选后进入异常诊断和明细下钻”的复合流程 + - 二次、三次请求显著多于 `G2-C`,复杂度高于本轮已落地三类 + +## 4. 本轮实施映射 + +按当前 plan 的 Phase 3 约束,本轮只选择最容易形成“分组复制”的 1 到 2 个新增家族,不做全量铺开。 + +优先级排序如下: + +1. `P0: G2-D 预测计算型` + 原因: + - 有两个结构高度相似的真实样本 + - 页面内计算逻辑清晰,字段也相对稳定 + - 可以形成新的最小合约和新的 fixture 组 +2. `P1: G2-E 核查对比型` + 原因: + - 虽然只有一个当前样本,但和现有 `G2-C` 最接近 + - 适合作为“主链路 + 关联补查”方向的下一扩展 +3. `P2: G2-F 异常诊断 / 下钻型` + 原因: + - 链路最长,remark 拼接和异常判断逻辑较重 + - 适合放在本轮后半段,优先做候选验证而不是一步做满 + +## 5. Phase 3 前置可行性结论 + +在进入实际实现前,对 `G2-D` 两个真实样本再次核查后确认: + +1. `线路月度高负损预测` +2. `台区线损台区月度高负损预测` + +这两类页面虽然具备稳定的预测结果字段和 `reportType`,但当前源码里几乎只体现为: + +1. 本地读表或页面内计算 +2. `localhost:13313` 导出与报表日志依赖 +3. 没有足够稳定的线损业务接口可抽成当前生成器的请求链路 + +因此,本轮实现不强行把 `G2-D` 塞进现有请求驱动编译链,而是将其保留为“候选组”,等待后续补齐“本地计算 / 文件输入型 skill”能力后再做。 + +据此,本轮实际实施顺序调整为: + +1. 先实现 `G2-E` +2. 再实现 `G2-F` +3. `G2-D` 暂停在候选验证阶段 + +## 6. Phase 1 退出结论 + +本轮线损扩展已经从“按单场景修补”切换到“按家族分组推进”: + +1. 已冻结 `G2-A/G2-B/G2-C` 基线,不再回头重做主样本修补。 +2. 已识别出后续新增的三个线损变体方向:`G2-D/G2-E/G2-F`。 +3. 经可行性核查后,本轮实际实现顺序固定为: + - 先做 `G2-E` + - 再做 `G2-F` + - `G2-D` 保留为下一阶段候选组 diff --git a/docs/superpowers/reports/2026-04-18-post-roadmap-execution-closure-report.md b/docs/superpowers/reports/2026-04-18-post-roadmap-execution-closure-report.md new file mode 100644 index 0000000..6af752a --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-post-roadmap-execution-closure-report.md @@ -0,0 +1,82 @@ +# Post-Roadmap Execution Closure Report + +> Date: 2026-04-18 +> Scope: `docs/superpowers/plans/2026-04-18-scene-skill-post-roadmap-execution-plan.md` +> Result: Completed + +## 1. Conclusion + +The post-roadmap execution plan has been closed within its own boundary. + +The completed outputs now exist for all required phases: + +1. Phase 0: handover boundary asset +2. Phase 1: minimum current execution board +3. Phase 2: real-sample validation layer +4. Phase 3: boundary and runtime entry rules +5. Phase 4: bounded next-roadmap design and plan + +## 2. Delivered Assets + +### Phase 0 + +1. `tests/fixtures/generated_scene/post_roadmap_handover_2026-04-18.json` + +### Phase 1 + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +### Phase 2 + +1. `tests/fixtures/generated_scene/real_sample_validation_plan_2026-04-18.json` +2. `tests/fixtures/generated_scene/real_sample_validation_record_template_2026-04-18.json` +3. `tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json` +4. `tests/fixtures/generated_scene/real_sample_mismatch_taxonomy_2026-04-18.json` + +### Phase 3 + +1. `tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json` + +### Phase 4 + +1. `docs/superpowers/specs/2026-04-18-scene-skill-real-sample-validation-roadmap-design.md` +2. `docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md` + +## 3. Key Outcome + +The most important change is not a new compiler feature. + +It is the separation of: + +1. frozen workbook snapshot +2. current execution-board state +3. real-sample validation state +4. next-roadmap scope decisions + +That separation prevents the next execution round from drifting back into asset-only refinement. + +## 4. Real-Sample Validation Status + +The first-round validation layer now records: + +1. `G2` as an executed mismatch anchor +2. `G1-E` as an executed pass anchor +3. `G3` as a selected real-sample anchor that still needs an executed real run + +This means the next roadmap can be validation-first rather than fixture-first. + +## 5. Boundary Control + +`G6/G7/G8` remain boundary families. + +`G4/G5` remain deferred or degraded. + +No new implementation scope was opened inside this plan. + +## 6. Verification + +The post-roadmap execution assets are now covered by: + +1. `cargo test --test post_roadmap_execution_assets_test -- --nocapture` + +This verification is in addition to the already existing family, ledger, and roadmap asset tests. diff --git a/docs/superpowers/reports/2026-04-18-r1-real-tq-lineloss-analysis.md b/docs/superpowers/reports/2026-04-18-r1-real-tq-lineloss-analysis.md new file mode 100644 index 0000000..15ad702 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-r1-real-tq-lineloss-analysis.md @@ -0,0 +1,217 @@ +# R1 真实样本分析:台区线损大数据-月_周累计线损率统计分析 + +日期:2026-04-18 + +样本来源:`D:\desk\智能体资料\全量业务场景\一平台场景\台区线损大数据-月_周累计线损率统计分析` + +生成输出:`examples/real_scene_batch_round1/skills/real-tq-lineloss-report-r1` + +对标基线: + +1. `tq-lineloss-report` +2. `P0-1 multi_mode_request.month_week_table` +3. `docs/superpowers/reports/2026-04-17-integration-test-report.md` + +## 1. 执行结果 + +已完成一次真实生成,生成命令成功返回,产物已写入仓库内独立目录。 + +本次结论不是“生成失败”,而是“生成成功但业务语义恢复失败”,因此当前样本不能进入后续内网验证。 + +## 2. 结果等级 + +综合判断:`不通过` + +原因: + +1. archetype 判错 +2. bootstrap 选错 +3. endpoint 污染严重 +4. modes 未恢复 +5. request contract 未恢复 +6. column defs 未恢复 +7. 生成脚本不可达到 `tq` 级别业务语义 + +## 3. 与目标基线的核心偏差 + +### 3.1 archetype 误判 + +目标应接近: + +- `multi_mode_request.month_week_table` + +实际生成结果: + +- `paginated_enrichment` + +直接证据: + +- `references/generation-report.md` 中写明 `Workflow archetype: paginated_enrichment` +- `references/generation-report.json` 中 `workflowArchetype = "paginated_enrichment"` +- 生成脚本中出现固定 `paginate -> secondary_request -> filter` 主链 + +影响: + +1. 生成器把“月/周双模式报表”错误理解为“分页主列表 + 二次补数” +2. 后续 request builder、response parser、artifact 结构都会沿错误路径展开 + +### 3.2 bootstrap 落点偏离 `tq` 主业务面 + +目标基线: + +- `tq-lineloss-report` 的 `scene.toml` 将 bootstrap 锚到 `20.76.57.61` +- `target_url` 指向线损业务页面入口 + +本次生成结果: + +- `expected_domain = "20.77.115.36:31051"` +- `target_url = "http://20.77.115.36:31051"` + +影响: + +1. 生成器把其他页面/系统入口误当成主业务承载面 +2. 即使后续请求里碰巧抓到线损接口,运行时 page context 也可能不匹配 + +### 3.3 endpoint 污染严重 + +目标状态: + +- 只保留与线损月/周统计直接相关的主业务接口 + +本次生成结果: + +1. 虽然抓到了 `getYearMonWeekLinelossAnalysisList` +2. 也抓到了 `fourVerEightHorLinelossRateList` +3. 但同时混入了大量无关 endpoint,包括: +4. 95598 工单接口 +5. 其他业务系统登录页 +6. 各类第三方库、文档、GitHub、StackOverflow 链接 +7. `pako/jszip` 等静态资源里的外部 URL + +影响: + +1. endpoint candidate 污染后,主链排序会失真 +2. 生成器最终把 `API_ENDPOINTS[0]` 和 `API_ENDPOINTS[1]` 当作主请求与补请求,但这只是“列表里前两个”,不是业务上正确的月/周接口矩阵 + +### 3.4 modes 完全未恢复 + +目标状态: + +- 至少应恢复 `month/week` 模式 +- 至少应恢复 mode switch field +- 至少应为不同模式恢复不同 request/column/response 口径 + +本次生成结果: + +1. `scene.toml` 中没有模式结构 +2. `generation-report.json` 中 `modes = []` +3. 虽然 `defaultMode = "month"`、`modeSwitchField = "period_mode"` 被挂上了默认值 +4. 但实际脚本没有 `MODES`、没有 `detectMode()`、没有分模式请求 + +影响: + +1. 这是最核心的语义缺失 +2. 当前生成物本质上无法表达 `tq` 场景的主业务结构 + +### 3.5 request contract 未恢复 + +目标状态: + +- 月模式与周模式应分别拥有独立请求体 +- 应恢复关键字段,例如 `orgno`、`tjzq`、`level`、`rows`、`page`、`sidx`、`sord` 以及周期相关字段 + +本次生成结果: + +1. `requestTemplate = null` +2. `params = []` +3. 脚本中的 `buildRequest()` 直接把 `args` 全量 `JSON.stringify` +4. 没有 mode-specific builder +5. 没有月/周不同字段 + +影响: + +1. 请求合同没有闭合 +2. 即使接口名对了,也无法形成正确请求 + +### 3.6 response 与列定义未恢复 + +目标状态: + +- 线损月模式、周模式应各自拥有明确列定义 +- 至少应恢复 `content` 路径和 mode-specific columns + +本次生成结果: + +1. `responsePath = "content"` 这一点是对的 +2. 但 `columnDefs = []` +3. `normalizeRules.requiredFields = []` +4. 最终脚本用 `rows[0]` 的 key 动态拼 `column_defs` + +影响: + +1. 缺少业务列语义 +2. 缺少 required field 约束 +3. 无法达到 `tq` 基线中的导出与报表结构稳定性 + +## 4. 这次生成暴露出的真实问题 + +### 4.1 不是“完全识别不到线损” + +本次结果说明,系统并不是完全抓不到线损语义。 + +正向信号仍然存在: + +1. 抓到了 `branch_fields=tjzq, mode, week, month` +2. 抓到了线损核心接口名 +3. 抓到了 `responsePath = content` + +这说明底层“有感知”。 + +### 4.2 但当前仍停在“信号能抓到,主链不会重建” + +也就是说: + +1. 能看到线损相关 endpoint +2. 能看到 month/week 字样 +3. 但不会把这些信号重建成 `multi_mode_request.month_week_table` +4. 反而被分页、补数、过滤等噪声信号夺走主导权 + +### 4.3 readiness=A 目前不可信 + +本次样本虽然给了 `Readiness: A`,但实际核心合同大量缺失。 + +这说明当前 readiness 仍偏结构乐观,不能代表“可运行”或“接近内网可运行”。 + +## 5. 当前是否可以进入内网验证 + +结论:`不可以` + +原因很直接: + +1. archetype 错 +2. bootstrap 错 +3. modes 空 +4. request contract 空 +5. column defs 空 + +在这五项没有过关前,进入内网验证只会得到伪失败结果,没有业务验证价值。 + +## 6. 对后续批量迁移的含义 + +这次真实样本验证说明: + +1. 当前链路已经具备“从真实场景中抓取部分业务信号”的能力 +2. 但还不具备“自动重建 `tq` 级主业务语义”的能力 +3. 因此不能直接把 102 个场景送去大规模生成 +4. 必须先把 `G2` 主样本的 archetype/mode/request/column 四件套打透 + +## 7. 下一步建议 + +下一步仍严格按既定 plan 走,不扩散: + +1. 继续生成 `白银线损周报` +2. 再生成 `线损同期差异报表` +3. 对比这两个 `G2` 扩展样本是否复现同类偏差 +4. 输出一份 `G2` 家族首轮 blocker 汇总 + +只有当 `G2` 家族的 blocker 画像稳定后,才进入后续整改或更大范围迁移。 diff --git a/docs/superpowers/reports/2026-04-18-roadmap-execution-status-report.md b/docs/superpowers/reports/2026-04-18-roadmap-execution-status-report.md new file mode 100644 index 0000000..e3d37e5 --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-roadmap-execution-status-report.md @@ -0,0 +1,60 @@ +# Roadmap Execution Status Report + +## Scope + +This round continues executing the remaining in-plan work from `2026-04-17-scene-skill-60-to-90-roadmap-plan.md`. + +It does not create new family contracts. It records the current roadmap execution state after `G1-E`, `G2`, and `G3` were all promoted into the same family-asset shape. + +## Delivered + +Added: + +1. `tests/fixtures/generated_scene/roadmap_execution_status_2026-04-18.json` +2. `tests/roadmap_execution_status_test.rs` + +## Current Plan Position + +The roadmap is now recorded as: + +1. `Phase 0 = completed` +2. `Phase 1 = completed` +3. `Phase 2 = completed` +4. `Phase 3 = completed` +5. `Phase 4 = in_progress` + +Within `Phase 4`, the current track status is: + +1. `Track A / G2 = batch-expansion-promoted` +2. `Track B / G1-E = batch-expansion-promoted` +3. `Track C / G3 = batch-expansion-promoted` +4. `Track D = in_progress` +5. `Track E = status-overlay-established` + +## Why This Asset Exists + +At this point, the repo already has: + +1. family policy assets +2. family result assets +3. frozen ledger snapshot assets +4. current ledger overlay assets +5. family candidate-batch assets for all mainline families + +The roadmap still needed one status asset that states, in one place, where the plan currently stands and what remains queued or deferred. + +## Validation + +Passed: + +1. `cargo test --test roadmap_execution_status_test -- --nocapture` +2. `cargo test --test scene_ledger_status_test -- --nocapture` +3. `cargo test --test scene_generator_family_policy_test -- --nocapture` + +## Outcome + +The roadmap now has a repo-local execution-status asset that makes the remaining in-plan work explicit: + +1. continue from queued mainline candidates +2. keep Track E overlays synchronized +3. do not reopen already completed P0 and family-baseline work diff --git a/docs/superpowers/reports/2026-04-18-track-e-ledger-status-overlay-report.md b/docs/superpowers/reports/2026-04-18-track-e-ledger-status-overlay-report.md new file mode 100644 index 0000000..8b312cc --- /dev/null +++ b/docs/superpowers/reports/2026-04-18-track-e-ledger-status-overlay-report.md @@ -0,0 +1,62 @@ +# Track E Ledger Status Overlay Report + +## Scope + +This round does not rewrite the frozen workbook snapshot from `2026-04-18 16:48:05`. + +It adds a current-status overlay asset so the roadmap Track E state can reflect the code-backed family baselines that were established after the workbook snapshot was frozen. + +## Delivered + +Added: + +1. `tests/fixtures/generated_scene/scene_ledger_status_2026-04-18.json` +2. `tests/scene_ledger_status_test.rs` + +## Why This Overlay Exists + +By this point in the roadmap, the repo already contains code-backed baseline assets for: + +1. `G2` +2. `G1-E` +3. `G3` +4. `G6/G7/G8` boundary-runtime families + +But the original workbook snapshot still reflects an earlier mid-run state. + +Track E therefore needs two layers: + +1. frozen snapshot layer +2. current baseline overlay layer + +## Current Mainline Overlay Status + +The overlay now records: + +1. `G2 = batch-expansion-promoted` +2. `G1-E = batch-expansion-promoted` +3. `G3 = batch-expansion-promoted` + +And explicitly records promoted ledger-facing entries for: + +1. the `G2` promoted baseline and promoted expansion fixtures +2. the `G1-E` promoted baseline +3. the `G3` promoted baseline +4. the current `G3` promoted expansion fixtures + +## Validation + +Passed: + +1. `cargo test --test scene_ledger_status_test -- --nocapture` +2. `cargo test --test scene_ledger_snapshot_test -- --nocapture` +3. `cargo test --test scene_generator_family_policy_test -- --nocapture` + +## Outcome + +Track E now has a stable way to express both: + +1. the original frozen `102`-scene workbook state +2. the current code-backed roadmap status + +That means the next roadmap round can continue promoting candidates without losing visibility into which family baselines have already been formally established. diff --git a/docs/superpowers/reports/2026-04-19-102-final-coverage-status-rollup-report.md b/docs/superpowers/reports/2026-04-19-102-final-coverage-status-rollup-report.md new file mode 100644 index 0000000..e3166d0 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-final-coverage-status-rollup-report.md @@ -0,0 +1,71 @@ +# 102 Final Coverage Status Rollup Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-final-coverage-status-rollup-plan.md` +> Parent Framework: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Status: Completed + +## Scope + +This report publishes the final policy-governed coverage rollup after the residual 13 closure sequence. + +This plan does not update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json`, does not modify analyzer/generator logic, does not rerun the 102 sweep, and does not promote scenes to official board status. + +## Inputs + +- `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` +- `tests/fixtures/generated_scene/residual_13_reconciliation_candidates_2026-04-19.json` +- `tests/fixtures/generated_scene/boundary_residual_hold_decision_2026-04-19.json` +- `tests/fixtures/generated_scene/bootstrap_target_residual_isolation_2026-04-19.json` +- `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` + +## Final Rollup + +| Final candidate status | Count | +| --- | ---: | +| `framework-auto-pass-candidate` | 95 | +| `framework-structured-fail-closed` | 7 | +| `framework-valid-host-bridge` | 0 | +| `hygiene-pass-candidate` | 0 | +| `hygiene-fail-closed-candidate` | 0 | +| `source-unreadable` | 0 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| `misclassified-unresolved` | 0 | +| `unresolved-followup-status` | 0 | + +## Remaining Non-Auto-Pass Scenes + +The remaining 7 records are explained residuals, not unresolved framework failures. + +| Scene ID | Scene | Final status | Overlay | +| --- | --- | --- | --- | +| `sweep-033-scene` | `供电可靠率指标统计表` | `framework-structured-fail-closed` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-034-scene` | `供电可靠性数据质量自查报告月报` | `framework-structured-fail-closed` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-042-scene` | `国网金昌供电公司营商环境周例会报告` | `framework-structured-fail-closed` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-051-scene` | `嘉峪关可靠性分析报告` | `framework-structured-fail-closed` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-074-scene` | `同兴智能安全督查日报` | `framework-structured-fail-closed` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-085-scene` | `业扩报装管理制度` | `framework-structured-fail-closed` | `hold-for-host-bridge-runtime-roadmap` | +| `sweep-091-scene` | `用户停电频次分析监测` | `framework-structured-fail-closed` | `isolate-bootstrap-target-residual` | + +## Interpretation + +The 102-scene framework now has no unresolved category: + +- no `source-unreadable`; +- no `missing-source`; +- no `unsupported-family`; +- no `misclassified-unresolved`; +- no `unresolved-followup-status`. + +The final status is therefore `95` framework auto-pass candidates plus `7` explained structured fail-closed residuals. + +## Output Asset + +- `tests/fixtures/generated_scene/final_coverage_status_rollup_2026-04-19.json` + +## Next Step + +The next bounded step should be an official board reconciliation plan. That plan may consume this rollup and decide whether and how to update the official execution board. + +Per Route 6 policy, this rollup does not update the official board by itself. diff --git a/docs/superpowers/reports/2026-04-19-102-full-coverage-followup-sweep-report.md b/docs/superpowers/reports/2026-04-19-102-full-coverage-followup-sweep-report.md new file mode 100644 index 0000000..fb60f71 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-full-coverage-followup-sweep-report.md @@ -0,0 +1,44 @@ +# 102 Full Coverage Follow-Up Sweep Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-plan.md` + +## Scope + +This sweep reran the fixed 102-scene set after Route 2 through Route 6. It did not modify analyzer/generator logic and did not update the official execution board. + +## Raw Sweep Summary + +| Raw status | Count | +| --- | ---: | +| auto-pass | 89 | +| fail-closed-known | 13 | +| adjudicated-valid-host-bridge | 0 | +| source-unreadable | 0 | +| missing-source | 0 | +| unsupported-family | 0 | +| misclassified-unresolved | 0 | + +## Archetype Summary + +| Archetype | Count | +| --- | ---: | +| paginated_enrichment | 51 | +| host_bridge_workflow | 27 | +| multi_mode_request | 10 | +| local_doc_pipeline | 5 | +| single_request_enrichment | 5 | +| multi_endpoint_inventory | 2 | +| page_state_eval | 2 | + +## Coverage + +| Metric | Count | +| --- | ---: | +| auto-pass | 89 / 102 | +| raw non-auto-pass | 13 / 102 | + +## Output + +- `tests/fixtures/generated_scene/full_coverage_followup_sweep_2026-04-19.json` +- `examples/full_coverage_followup_sweep_2026-04-19` diff --git a/docs/superpowers/reports/2026-04-19-102-full-coverage-reconciliation-candidates-report.md b/docs/superpowers/reports/2026-04-19-102-full-coverage-reconciliation-candidates-report.md new file mode 100644 index 0000000..8febb46 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-full-coverage-reconciliation-candidates-report.md @@ -0,0 +1,42 @@ +# 102 Full Coverage Reconciliation Candidates Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-plan.md` +> Policy: `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` + +## Scope + +This report applies Route 6 policy to the follow-up sweep and publishes candidate statuses only. It does not update `scene_execution_board_2026-04-18.json`. + +## Reconciliation Candidate Summary + +| Candidate status | Count | +| --- | ---: | +| framework-auto-pass-candidate | 89 | +| framework-structured-fail-closed | 13 | +| framework-valid-host-bridge | 0 | +| hygiene-pass-candidate | 0 | +| hygiene-fail-closed-candidate | 0 | +| source-unreadable | 0 | +| missing-source | 0 | +| unsupported-family | 0 | +| misclassified-unresolved | 0 | + +## Coverage View + +| Metric | Count | +| --- | ---: | +| framework-auto-pass-candidate | 89 / 102 | +| framework-supported or explained | 102 / 102 | +| remaining raw source-unreadable | 0 / 102 | +| unresolved misclassified | 0 / 102 | +| unsupported-family | 0 / 102 | + +## Stop Statement + +The follow-up sweep and reconciliation candidate reports are published. No new implementation route is started by this plan. + +## Output + +- `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` +- `docs/superpowers/reports/2026-04-19-102-full-coverage-reconciliation-candidates-report.md` diff --git a/docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md b/docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md new file mode 100644 index 0000000..eb251e9 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md @@ -0,0 +1,135 @@ +# 102 Full Sweep Dry-Run Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-plan.md` +> Result: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +> Output Root: `examples/full_sweep_dry_run_2026-04-19` + +## Scope + +This run measured current generic `scene -> skill` coverage over the fixed `102` scene execution board. + +It was a measurement-only dry-run: + +1. no analyzer logic was changed +2. no generator logic was changed +3. `scene_execution_board_2026-04-18.json` was not updated +4. no scene was promoted from this result +5. failures were recorded, not fixed + +## Headline Numbers + +| Metric | Count | +| --- | ---: | +| Real-sample executed pass | 5 / 102 | +| Code-backed ledger coverage | 23 / 102 | +| Dry-run auto-pass | 40 / 102 | +| Dry-run actionable coverage | 66 / 102 | + +`dry-run actionable coverage` is `auto-pass + fail-closed-known`. + +## Dry-Run Summary + +| Dry-run status | Count | +| --- | ---: | +| `auto-pass` | 40 | +| `fail-closed-known` | 26 | +| `misclassified` | 5 | +| `unsupported-family` | 0 | +| `missing-source` | 0 | +| `source-unreadable` | 31 | +| Total | 102 | + +## Archetype Distribution + +| Inferred archetype | Count | +| --- | ---: | +| `host_bridge_workflow` | 31 | +| `paginated_enrichment` | 8 | +| `multi_mode_request` | 3 | +| `multi_endpoint_inventory` | 2 | +| `page_state_eval` | 2 | +| `none` | 56 | + +The `none` bucket includes generator failures and timeout cases that did not produce a `generation-report.json`. + +## Auto-Pass Shape + +The `40` auto-pass scenes are distributed as: + +| Inferred archetype | Auto-pass count | +| --- | ---: | +| `host_bridge_workflow` | 26 | +| `paginated_enrichment` | 8 | +| `multi_mode_request` | 3 | +| `multi_endpoint_inventory` | 2 | +| `page_state_eval` | 1 | + +This means the current generic generator is no longer limited to the `23` code-backed ledger scenes. The conservative ledger coverage is lower because it only counts scenes already mapped into formal baseline or boundary assets. + +## Non-Pass Buckets + +### Source-Unreadable + +`31` scenes timed out during this bounded dry-run. + +All timeout records use: + +`generator timeout after 30s` + +These should not be interpreted as unsupported family evidence. They are dry-run execution-limit failures and need separate timeout/performance triage before capability conclusions are drawn. + +### Fail-Closed-Known + +`26` scenes failed without an auto-pass result but were recorded with a known dry-run failure category. + +Top reasons: + +| Reason | Count | +| --- | ---: | +| `generator failed without generation report` | 25 | +| `bootstrap_target` | 1 | + +The `generator failed without generation report` bucket is actionable but too broad for implementation work. It should be split in a later bounded triage pass before any fixes are attempted. + +### Misclassified + +`5` scenes produced a package, but the inferred archetype conflicted with the current board group: + +| Scene | Current group | Inferred archetype | +| --- | --- | --- | +| `95598报修工单日管控` | `G3` | `host_bridge_workflow` | +| `95598重要服务事项报备统计表` | `G3` | `host_bridge_workflow` | +| `用电报装信息统计列表` | `G1-E` | `host_bridge_workflow` | +| `配网支撑月报(95598抢修统计报表)` | `G3` | `host_bridge_workflow` | +| `高低压新增报装容量月度统计表` | `G1-E` | `host_bridge_workflow` | + +This is the clearest blocker category from the dry-run because it indicates current generic routing can over-prefer `host_bridge_workflow` on some scenes that already have board-level family expectations. + +## Interpretation + +The four coverage numbers answer different questions: + +1. `5 / 102` is the strict real-sample pass count. +2. `23 / 102` is the formal code-backed ledger coverage. +3. `40 / 102` is the current generic dry-run auto-pass count. +4. `66 / 102` is the current generic actionable coverage count. + +The key result is that the generic generator currently auto-passes more scenes than the formal ledger coverage shows, but the result is not clean enough to promote automatically because: + +1. `31` scenes hit bounded dry-run timeouts. +2. `5` scenes show board-vs-archetype mismatch. +3. `26` scenes need more specific failure extraction before implementation work. + +## Recommended Next Blocker + +Do not start implementation from this report directly. + +The next bounded step should be a dry-run triage pass, with priority: + +1. split the `31` timeout cases into true timeout, oversized source, and command-level hang +2. inspect the `5` misclassified cases as the first routing-quality sample +3. refine the `25` generic no-report failures into concrete failure categories + +This report does not update the execution board and does not promote any scene. + diff --git a/docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-triage-report.md b/docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-triage-report.md new file mode 100644 index 0000000..d309169 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-triage-report.md @@ -0,0 +1,117 @@ +# 102 Full Sweep Dry-Run Triage Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-full-sweep-dry-run-triage-plan.md` +> Source Dry-Run: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +> Triage Result: `tests/fixtures/generated_scene/full_sweep_dry_run_triage_2026-04-19.json` + +## Scope + +This triage explains the `62` non-pass records from the `102` scene full sweep. + +It is classification-only: + +1. no analyzer implementation was changed +2. no generator implementation was changed +3. `scene_execution_board_2026-04-18.json` was not updated +4. no scene was promoted +5. no implementation correction was started + +## Input Buckets + +| Bucket | Count | +| --- | ---: | +| Timeout / `source-unreadable` | 31 | +| Misclassified | 5 | +| No-report failure | 25 | +| Bootstrap target failure | 1 | +| Total non-pass records | 62 | + +## Timeout Triage + +All `31` timeout records originally had: + +`generator timeout after 30s` + +The second-level labels are: + +| Timeout label | Count | +| --- | ---: | +| `timeout-unvalidated-source` | 19 | +| `timeout-large-source` | 8 | +| `timeout-known-family-sample` | 4 | + +No timeout record is treated as unsupported family by default. + +The `timeout-known-family-sample` records are important because they include scenes that already have current family mapping. These should be separated from the unvalidated-source and large-source buckets before any coverage conclusion is drawn. + +## Misclassification Triage + +All `5` misclassified records share the same second-level label: + +`route-overprefer-host-bridge` + +| Scene | Expected group | Expected archetype | Inferred archetype | +| --- | --- | --- | --- | +| `95598报修工单日管控` | `G3` | `paginated_enrichment` | `host_bridge_workflow` | +| `95598重要服务事项报备统计表` | `G3` | `paginated_enrichment` | `host_bridge_workflow` | +| `用电报装信息统计列表` | `G1-E` | `single_request_enrichment` | `host_bridge_workflow` | +| `配网支撑月报(95598抢修统计报表)` | `G3` | `paginated_enrichment` | `host_bridge_workflow` | +| `高低压新增报装容量月度统计表` | `G1-E` | `single_request_enrichment` | `host_bridge_workflow` | + +This is the clearest routing-quality signal in the dry-run. The current generic route can over-prefer `host_bridge_workflow` when board-level expectations already point to `G3` or `G1-E`. + +This report does not correct routing logic. + +## No-Report Failure Triage + +All `25` generic no-report failures were assigned the same failure stage: + +`readiness-before-report` + +The stderr archetype distribution is: + +| Stderr archetype | Count | +| --- | ---: | +| `paginated_enrichment` | 17 | +| `local_doc_pipeline` | 4 | +| `multi_mode_request` | 2 | +| `host_bridge_workflow` | 1 | +| `single_request_enrichment` | 1 | + +This means these failures are not source-missing failures. They reached a known archetype path, then failed before a structured `generation-report.json` was emitted. + +The dominant no-report shape is: + +`workflow evidence is incomplete for archetype paginated_enrichment` + +## Bootstrap Target Failure + +One failure remains separately tracked: + +| Scene | Inferred archetype | Reason | +| --- | --- | --- | +| `用户停电频次分析监测` | `page_state_eval` | `bootstrap_target` | + +It is not merged into the no-report bucket. + +## Recommended Next Blocker + +The next blocker should be handled in this order: + +1. `timeout-known-family-sample` +2. `route-overprefer-host-bridge` +3. `readiness-before-report` + +Reasoning: + +1. `timeout-known-family-sample` affects already-mapped scenes and can distort coverage if left mixed with unvalidated timeouts. +2. `route-overprefer-host-bridge` is a clean routing-quality issue across existing `G3` and `G1-E` expectations. +3. `readiness-before-report` is high volume, but it first needs structured fail-closed reporting before implementation correction. + +## Stop Statement + +This triage is complete and stops here. + +It does not start an implementation plan, does not promote scenes, and does not update the execution board. + diff --git a/docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-coverage-delta-report.md b/docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-coverage-delta-report.md new file mode 100644 index 0000000..c36616e --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-coverage-delta-report.md @@ -0,0 +1,82 @@ +# 102 Full Sweep Improvement Coverage Delta Report + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-19-102-full-sweep-improvement-roadmap-plan.md` +> Baseline: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +> Follow-Up: `tests/fixtures/generated_scene/full_sweep_improvement_followup_2026-04-19.json` + +## Headline Delta + +| Metric | Baseline | Follow-up | Delta | +| --- | ---: | ---: | ---: | +| Dry-run auto-pass | 40 | 48 | +8 | +| Dry-run actionable coverage | 66 | 96 | +30 | +| Timeout / source-unreadable | 31 | 2 | -29 | +| Misclassified | 5 | 4 | -1 | +| Fail-closed-known | 26 | 48 | +22 | + +The increase in `fail-closed-known` is expected. The roadmap converted many timeout/no-report cases into structured fail-closed outputs instead of process-level failures. + +## Follow-Up Status Counts + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `misclassified` | 4 | +| `source-unreadable` | 2 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| Total | 102 | + +## Remaining Timeout Cases + +Only `2` timeout cases remain: + +1. `任务报表` +2. `力禾动环系统巡视记录` + +These are not known-family samples. + +## Remaining Misclassification Cases + +`4` route conflicts remain: + +| Scene | Expected group | Inferred archetype | +| --- | --- | --- | +| `95598报修工单日管控` | `G3` | `host_bridge_workflow` | +| `95598重要服务事项报备统计表` | `G3` | `host_bridge_workflow` | +| `台区线损台区月度高负损预测` | `G2` | `host_bridge_workflow` | +| `配网支撑月报(95598抢修统计报表)` | `G3` | `host_bridge_workflow` | + +One previous `G1-E` conflict, `高低压新增报装容量月度统计表`, now returns to `single_request_enrichment` and auto-passes. + +`用电报装信息统计列表` now returns to `single_request_enrichment` but remains fail-closed, so it is no longer a route-overpreference case. + +## Structured Fail-Closed Result + +The baseline had `25` no-report failures. The follow-up emits structured fail-closed reports for these paths, with: + +1. inferred archetype +2. `generationStatus = fail-closed` +3. `failureStage = readiness-before-report` +4. blocker reason +5. readiness missing pieces and risks + +This makes failures machine-readable without weakening gates. + +## Interpretation + +The roadmap delivered measurable improvement: + +1. timeout noise was reduced from `31` to `2` +2. auto-pass increased from `40` to `48` +3. actionable coverage increased from `66` to `96` +4. no-report failures were converted into structured fail-closed results + +Remaining work is no longer the same broad ambiguity. The next blocker is the smaller route-conflict set, especially `G3/G2` vs `host_bridge_workflow`. + +## Stop Statement + +This report does not update `scene_execution_board_2026-04-18.json` and does not promote any scene. + diff --git a/docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-roadmap-closure-report.md b/docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-roadmap-closure-report.md new file mode 100644 index 0000000..22a3baa --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-roadmap-closure-report.md @@ -0,0 +1,80 @@ +# 102 Full Sweep Improvement Roadmap Closure Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-full-sweep-improvement-roadmap-plan.md` + +## Closure Summary + +The roadmap has completed its planned cycle: + +1. baseline dry-run and triage were frozen +2. known-family timeouts were diagnosed +3. source scanning was bounded to reduce vendor/static-file timeout noise +4. route over-preference was partially corrected and adjudicated +5. readiness-before-report failures now emit structured fail-closed reports +6. bootstrap target remained isolated +7. a follow-up `102` sweep quantified coverage delta + +## Final Coverage Delta + +| Metric | Baseline | Follow-up | Delta | +| --- | ---: | ---: | ---: | +| Auto-pass | 40 | 48 | +8 | +| Actionable coverage | 66 | 96 | +30 | +| Timeout | 31 | 2 | -29 | +| Misclassified | 5 | 4 | -1 | + +## Implementation Scope + +The implementation stayed within the roadmap: + +1. no new family was added +2. `G4/G5` were not started +3. login recovery was not implemented +4. host runtime transport was not implemented +5. attachment/local document runtime was not implemented +6. `scene_execution_board_2026-04-18.json` was not updated by the follow-up sweep + +## Files Changed + +Code changes: + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` + +New result assets: + +1. `tests/fixtures/generated_scene/known_family_timeout_diagnostic_2026-04-19.json` +2. `tests/fixtures/generated_scene/full_sweep_improvement_followup_2026-04-19.json` + +New reports: + +1. `docs/superpowers/reports/2026-04-19-known-family-timeout-diagnostic-report.md` +2. `docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-coverage-delta-report.md` +3. `docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-roadmap-closure-report.md` + +## Verification + +Executed checks: + +1. `cargo test --test scene_generator_test -- --nocapture` +2. `cargo test --test scene_generator_canonical_test -- --nocapture` +3. `cargo test --test post_roadmap_execution_assets_test -- --nocapture` +4. `cargo test --test scene_generator_family_policy_test -- --nocapture` +5. follow-up full sweep over the fixed `102` scene set + +## Remaining Blocker + +The remaining highest-value blocker is no longer the broad timeout/no-report ambiguity. + +The next blocker is the smaller routing-conflict set: + +1. `G3 -> host_bridge_workflow` +2. `G2 -> host_bridge_workflow` + +This should be handled by a new bounded route-conflict roadmap only if further work is requested. + +## Stop Statement + +This roadmap stops here. It does not automatically start a new route-conflict implementation plan and does not update the execution board. + diff --git a/docs/superpowers/reports/2026-04-19-102-sweep-status-reconciliation-report.md b/docs/superpowers/reports/2026-04-19-102-sweep-status-reconciliation-report.md new file mode 100644 index 0000000..088013b --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-102-sweep-status-reconciliation-report.md @@ -0,0 +1,83 @@ +# 102 Sweep Status Reconciliation Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-102-sweep-status-reconciliation-plan.md` +> Reconciliation Asset: `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` + +## Input Validation + +The reconciliation used only the frozen inputs required by the plan: + +| Input | Count | +| --- | ---: | +| Follow-up sweep scenes | 102 | +| Route-conflict decisions | 4 | + +All `4` route-conflict decisions matched a follow-up `sceneId`, and each matched record kept raw `dryRunStatus = misclassified`. No `102` sweep rerun was performed. + +## Raw Follow-Up Counts + +| Raw dry-run status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `misclassified` | 4 | +| `source-unreadable` | 2 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | + +The raw follow-up asset still reports `misclassified = 4` because it is the original sweep measurement. Those `4` records are not rewritten in place. + +## Reconciled Counts + +| Reconciled status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 2 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| `misclassified-unresolved` | 0 | +| Total | 102 | + +The raw `misclassified = 4` has been fully adjudicated by `remaining_route_conflict_decisions_2026-04-19.json`. In the reconciled view, these no longer represent unresolved route bugs; they are valid host-bridge workflows whose prior group expectations did not close the G2/G3 contracts. + +## Valid Host-Bridge Adjudications + +| Scene ID | Scene | Raw status | Route decision | Reconciled status | +| --- | --- | --- | --- | --- | +| `sweep-007-95598` | 95598报修工单日管控 | `misclassified` | `valid-host-bridge-workflow` | `adjudicated-valid-host-bridge` | +| `sweep-009-95598` | 95598重要服务事项报备统计表 | `misclassified` | `valid-host-bridge-workflow` | `adjudicated-valid-host-bridge` | +| `sweep-029-scene` | 台区线损台区月度高负损预测 | `misclassified` | `valid-host-bridge-workflow` | `adjudicated-valid-host-bridge` | +| `sweep-093-95598` | 配网支撑月报(95598抢修统计报表) | `misclassified` | `valid-host-bridge-workflow` | `adjudicated-valid-host-bridge` | + +Each adjudicated record preserves `decisionReason` and `evidenceSummary` in the reconciliation JSON. + +## Remaining Timeout Inputs + +| Scene ID | Scene | Raw status | Reason | +| --- | --- | --- | --- | +| `sweep-015-scene` | 任务报表 | `source-unreadable` | generator timeout after 45s | +| `sweep-025-scene` | 力禾动环系统巡视记录 | `source-unreadable` | generator timeout after 45s | + +These `2` records remain future timeout/source-scale diagnostic inputs. + +After route adjudication, the remaining future inputs are exactly these `2` timeout records and the `48` structured fail-closed records summarized below. + +## Fail-Closed Known Summary + +| Inferred archetype | Reason | Count | +| --- | --- | ---: | +| `paginated_enrichment` | workflow evidence is incomplete before package generation | 35 | +| `local_doc_pipeline` | workflow evidence is incomplete before package generation | 5 | +| `multi_mode_request` | workflow evidence is incomplete before package generation | 4 | +| `single_request_enrichment` | workflow evidence is incomplete before package generation | 2 | +| `host_bridge_workflow` | workflow evidence is incomplete before package generation | 1 | +| `page_state_eval` | bootstrap_target | 1 | + +The remaining structured future inputs are `48` `fail-closed-known` records. They are visible as workflow-evidence or contract-completion analysis candidates, but this plan does not start that roadmap or implement fixes. + +## Scope Confirmation + +No execution-board status was changed. No scene was promoted. No family baseline was added or modified. `src/generated_scene/analyzer.rs`, `src/generated_scene/generator.rs`, and `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` were not modified by this reconciliation. diff --git a/docs/superpowers/reports/2026-04-19-bootstrap-target-normalization-roadmap-report.md b/docs/superpowers/reports/2026-04-19-bootstrap-target-normalization-roadmap-report.md new file mode 100644 index 0000000..bb53e6c --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-bootstrap-target-normalization-roadmap-report.md @@ -0,0 +1,55 @@ +# Bootstrap Target Normalization Roadmap Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-bootstrap-target-normalization-roadmap-plan.md` +> Fixed scene: `sweep-091-scene` + +## Scope + +This plan handled only `sweep-091-scene / 配网异常设备监控统计`. + +It did not update the official board, did not start host-bridge runtime work, and did not modify `analyzer.rs` or `generator.rs`. + +## Baseline Finding + +The current official board had this scene as: + +- `currentFrameworkStatus = framework-structured-fail-closed` +- `currentFrameworkArchetype = page_state_eval` +- `currentFrameworkReadiness = C` +- `currentFrameworkNextAction = future-bootstrap-target-normalization-roadmap-input` + +The residual was caused by an old path where bootstrap fields were empty. A fresh bounded rerun against the official-board source scene recovered the deterministic target: + +- `expectedDomain = 21.77.244.194:18890` +- `targetUrl = http://21.77.244.194:18890/mainSystem` + +## Follow-Up Result + +Follow-up asset: + +`tests/fixtures/generated_scene/bootstrap_target_normalization_followup_2026-04-19.json` + +| Scene id | Scene name | Archetype | Readiness | Result | +| --- | --- | --- | --- | --- | +| `sweep-091-scene` | `配网异常设备监控统计` | `single_request_enrichment` | `A` | `auto-pass` | + +## Reconciliation Candidate + +Candidate asset: + +`tests/fixtures/generated_scene/bootstrap_target_normalization_reconciliation_candidates_2026-04-19.json` + +The scene is now published as: + +`framework-auto-pass-candidate` + +Per promotion policy, the official board must be updated by a dedicated final-2 board reconciliation refresh, not by this roadmap. + +## Notes + +Earlier residual reports include a scene-name drift where the same `sweep-091-scene` id was associated with `用户停电频次分析监测`. This roadmap uses the official-board source scene `配网异常设备监控统计` and does not edit historical assets. + +## Closure + +This roadmap is complete. No implementation slice was required; the single residual closed through bounded rerun and deterministic bootstrap target recovery. diff --git a/docs/superpowers/reports/2026-04-19-bootstrap-target-residual-isolation-report.md b/docs/superpowers/reports/2026-04-19-bootstrap-target-residual-isolation-report.md new file mode 100644 index 0000000..0e7774b --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-bootstrap-target-residual-isolation-report.md @@ -0,0 +1,18 @@ +# Bootstrap Target Residual Isolation Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-bootstrap-target-residual-isolation-plan.md` + +## Scope + +This Route D plan isolates the single page-state/bootstrap-target residual. It does not implement login recovery or runtime navigation. + +## Decision + +| Scene | Archetype | Status | Decision | Next Action | +| --- | --- | --- | --- | --- | +| 用户停电频次分析监测 | page_state_eval | framework-structured-fail-closed | isolate-bootstrap-target-residual | future-bootstrap-target-normalization-roadmap-input | + +## Stop Statement + +Isolation assets are published. No login recovery, runtime navigation implementation, board update, or generator/analyzer change is started by this plan. diff --git a/docs/superpowers/reports/2026-04-19-boundary-fail-closed-decision-report.md b/docs/superpowers/reports/2026-04-19-boundary-fail-closed-decision-report.md new file mode 100644 index 0000000..a5a1589 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-fail-closed-decision-report.md @@ -0,0 +1,53 @@ +# Boundary Fail-Closed Decision Report + +Date: 2026-04-19 + +Parent framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan` + +Parent route: `Route 5 / boundary-family fail-closed` + +Plan: `2026-04-19-boundary-fail-closed-decision-plan.md` + +## Scope + +This is a decision-only route. It does not change analyzer/generator code and does not update the execution board. + +Fixed Route 5 input buckets: + +| Bucket | Count | +| --- | ---: | +| `local_doc_pipeline` | 5 | +| `host_bridge_workflow` | 1 | +| `page_state_eval/bootstrap_target` | 1 | + +## Decisions + +| Bucket | Decision | Reason | +| --- | --- | --- | +| `local_doc_pipeline` | `hold-as-boundary-fail-closed` | Requires local document runtime and attachment handling beyond the current full-coverage implementation routes. | +| `host_bridge_workflow` | `hold-as-boundary-fail-closed` | Remaining boundary record depends on host bridge execution semantics outside this decision-only route. | +| `page_state_eval/bootstrap_target` | `isolate-bootstrap-target` | Bootstrap target resolution is a separate navigation/login target problem and must stay isolated from contract recovery. | + +## Result + +Route 5 produces no code-level coverage delta. Its expected delta is decision quality: + +- unresolved boundary ambiguity: `0` +- open boundary implementation slices: `0` +- held/deferred boundary records: `7` + +## Boundary + +This route did not: + +- modify `src/generated_scene/analyzer.rs` +- modify `src/generated_scene/generator.rs` +- update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +- start boundary implementation +- create a new family + +## Stop Statement + +Route 5 is decision-complete. The next child plan in the 102 full-coverage sequence is Route 6: + +`2026-04-19-promotion-and-board-reconciliation-policy-plan.md` diff --git a/docs/superpowers/reports/2026-04-19-boundary-family-entry-decision-report.md b/docs/superpowers/reports/2026-04-19-boundary-family-entry-decision-report.md new file mode 100644 index 0000000..e4f4a09 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-family-entry-decision-report.md @@ -0,0 +1,47 @@ +# Boundary Family Entry Decision Report + +> Date: 2026-04-19 +> Roadmap: [2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md) + +## Decision + +One boundary family is selected as the next real-sample entry candidate: + +1. `G7` + +The other boundary families remain held: + +1. `G6` +2. `G8` + +## Comparison Result + +### G6 + +`G6` already has a code-backed minimum runtime contract, but the next real-sample step would require host bridge execution semantics. + +That prerequisite is still too platform-shaped for the smallest bounded next slice. + +### G7 + +`G7` already has a minimal runnable runtime contract and only lacks real multi-endpoint aggregation verification. + +Among `G6/G7/G8`, this is the smallest next capability gap. + +So `G7` is selected. + +### G8 + +`G8` already has a code-backed minimum runtime contract, but the next real-sample step still depends on local document pipeline runtime and attachment handling. + +That remains heavier than the bounded next-step budget. + +## Result + +The roadmap now has one bounded next direction: + +1. `G7 real-sample entry` + +This report does not execute a real sample yet. + +It only closes the boundary-family priority ambiguity. diff --git a/docs/superpowers/reports/2026-04-19-boundary-family-real-sample-entry-roadmap-closure-report.md b/docs/superpowers/reports/2026-04-19-boundary-family-real-sample-entry-roadmap-closure-report.md new file mode 100644 index 0000000..f643eab --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-family-real-sample-entry-roadmap-closure-report.md @@ -0,0 +1,32 @@ +# Boundary Family Real-Sample Entry Roadmap Closure Report + +> Date: 2026-04-19 +> Roadmap: [2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-boundary-family-real-sample-entry-roadmap-plan.md) + +## Result + +This roadmap is closed with one bounded next direction: + +1. `G7 real-sample entry` + +## What Was Closed + +1. `G6/G7/G8` no longer compete ambiguously for next priority +2. only one next-family direction is opened +3. a follow-up bounded `design + plan` now exists for the selected direction + +## Hold Result + +The following families remain held: + +1. `G6` +2. `G8` + +The deferred families remain untouched: + +1. `G4` +2. `G5` + +## Next Step + +Execute the bounded `G7 real-sample entry` plan rather than reopening this roadmap. diff --git a/docs/superpowers/reports/2026-04-19-boundary-residual-hold-decision-report.md b/docs/superpowers/reports/2026-04-19-boundary-residual-hold-decision-report.md new file mode 100644 index 0000000..1f22e9e --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-residual-hold-decision-report.md @@ -0,0 +1,31 @@ +# Boundary Residual Hold Decision Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-boundary-residual-hold-decision-plan.md` + +## Scope + +This Route C plan is decision-only. It classifies the remaining boundary residual records and does not implement runtime support. + +## Summary + +| Bucket | Count | Decision | +| --- | ---: | --- | +| local_doc_pipeline | 5 | hold-for-local-doc-runtime-roadmap | +| host_bridge_workflow | 1 | hold-for-host-bridge-runtime-roadmap | +| unresolved boundary ambiguity | 0 | none | + +## Decisions + +| Scene | Archetype | Decision | Next Action | +| --- | --- | --- | --- | +| 供电可靠率指标统计表 | local_doc_pipeline | hold-for-local-doc-runtime-roadmap | future-local-doc-runtime-roadmap-input | +| 供电可靠性数据质量自查报告月报 | local_doc_pipeline | hold-for-local-doc-runtime-roadmap | future-local-doc-runtime-roadmap-input | +| 国网金昌供电公司营商环境周例会报告 | local_doc_pipeline | hold-for-local-doc-runtime-roadmap | future-local-doc-runtime-roadmap-input | +| 嘉峪关可靠性分析报告 | local_doc_pipeline | hold-for-local-doc-runtime-roadmap | future-local-doc-runtime-roadmap-input | +| 同兴智能安全督查日报 | local_doc_pipeline | hold-for-local-doc-runtime-roadmap | future-local-doc-runtime-roadmap-input | +| 业扩报装管理制度 | host_bridge_workflow | hold-for-host-bridge-runtime-roadmap | future-host-bridge-runtime-roadmap-input | + +## Stop Statement + +Decision assets are published. No boundary implementation, board update, or generator/analyzer change is started by this plan. diff --git a/docs/superpowers/reports/2026-04-19-boundary-runtime-entry-decision.md b/docs/superpowers/reports/2026-04-19-boundary-runtime-entry-decision.md new file mode 100644 index 0000000..636ad01 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-runtime-entry-decision.md @@ -0,0 +1,33 @@ +# Boundary Runtime Entry Decision + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md` + +## Decision + +`G6/G7/G8` do not enter the next roadmap. + +They remain boundary families. + +## Reason + +The current executed validation pressure is still concentrated in mainline families: + +1. `G2` real sample: mismatch +2. `G1-E` real sample: pass +3. `G3` real sample: mismatch + +No boundary family currently has stronger execution pressure than the `G3` and `G2` mainline failures. + +## Inclusion / Exclusion Result + +Excluded from next roadmap: + +1. `G6` +2. `G7` +3. `G8` + +Entry condition remains unchanged: + +1. a real sample must require boundary-family runtime semantics strongly enough to outrank unresolved mainline gaps +2. the required runtime gap must be explicit rather than inferred by drift diff --git a/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-decision-report.md b/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-decision-report.md new file mode 100644 index 0000000..a787520 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-decision-report.md @@ -0,0 +1,47 @@ +# Boundary Runtime Prerequisites Decision Report + +> Date: 2026-04-19 +> Roadmap: [2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md) + +## Decision + +One bounded prerequisite direction is selected: + +1. `G6 host-bridge prerequisites` + +The following prerequisite direction remains held: + +1. `G8 local-doc prerequisites` + +## Comparison Result + +### G6 Host-Bridge Prerequisites + +`G6` is blocked by one clearer prerequisite line: + +1. host bridge real-sample execution semantics + +That is still platform-shaped, but it is narrower than the combined local-doc burden on `G8`. + +So `G6 host-bridge prerequisites` is selected. + +### G8 Local-Doc Prerequisites + +`G8` is blocked by a broader prerequisite bundle: + +1. local document runtime +2. attachment handling + +That bundle is heavier than the next bounded slice should absorb. + +So `G8 local-doc prerequisites` remains held. + +## Result + +The roadmap now has one bounded next direction: + +1. `G6 host-bridge prerequisites` + +This report does not implement the prerequisite. + +It only closes the prerequisite priority ambiguity. diff --git a/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-roadmap-closure-report.md b/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-roadmap-closure-report.md new file mode 100644 index 0000000..8f0ccf0 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-roadmap-closure-report.md @@ -0,0 +1,30 @@ +# Boundary Runtime Prerequisites Roadmap Closure Report + +> Date: 2026-04-19 +> Roadmap: [2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-boundary-runtime-prerequisites-roadmap-plan.md) + +## Result + +This roadmap is closed with one bounded next direction: + +1. `G6 host-bridge prerequisites` + +## What Was Closed + +1. `G6` and `G8` no longer compete ambiguously at the prerequisite level +2. one follow-up bounded `design + plan` now exists for the selected prerequisite direction + +## Hold Result + +The following direction remains held: + +1. `G8 local-doc prerequisites` + +The deferred families remain untouched: + +1. `G4` +2. `G5` + +## Next Step + +Execute the bounded `G6 host-bridge prerequisites` plan rather than reopening this roadmap. diff --git a/docs/superpowers/reports/2026-04-19-deferred-family-entry-decision.md b/docs/superpowers/reports/2026-04-19-deferred-family-entry-decision.md new file mode 100644 index 0000000..42246d4 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-deferred-family-entry-decision.md @@ -0,0 +1,35 @@ +# Deferred Family Entry Decision + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md` + +## Decision + +`G4` and `G5` do not enter the next roadmap. + +Current status remains: + +1. `G4`: deferred +2. `G5`: degraded + +## Reason + +The current validation evidence does not justify opening deferred families. + +The strongest observed pressure is still: + +1. `G3` real-sample archetype collapse +2. `G2` real-sample contract mismatch + +Opening `G4/G5` before addressing those mainline failures would violate the bounded-scope rule of the roadmap. + +## Updated Scope Boundary + +The next roadmap remains limited to: + +1. executed mainline mismatch correction + +The next roadmap remains closed to: + +1. `G4` implementation +2. `G5` implementation diff --git a/docs/superpowers/reports/2026-04-19-final-2-official-board-reconciliation-refresh-report.md b/docs/superpowers/reports/2026-04-19-final-2-official-board-reconciliation-refresh-report.md new file mode 100644 index 0000000..af07bc2 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-final-2-official-board-reconciliation-refresh-report.md @@ -0,0 +1,36 @@ +# Final 2 Official Board Reconciliation Refresh Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-final-2-official-board-reconciliation-refresh-plan.md` + +## Inputs + +- `tests/fixtures/generated_scene/bootstrap_target_normalization_reconciliation_candidates_2026-04-19.json` +- `tests/fixtures/generated_scene/host_bridge_runtime_reconciliation_candidates_2026-04-19.json` + +## Result + +| Metric | Count | +| --- | ---: | +| `framework-auto-pass` | 102 | +| `framework-structured-fail-closed` | 0 | +| unresolved framework statuses | 0 | + +## Updated Scenes + +| Scene id | Official board name | Candidate source name | New framework status | Archetype | +| --- | --- | --- | --- | --- | +| `sweep-091-scene` | `配网异常设备监控统计` | `配网异常设备监控统计` | `framework-auto-pass` | `single_request_enrichment` | +| `sweep-085-scene` | `计量资产库存统计` | `计量资产库存统计` | `framework-auto-pass` | `multi_endpoint_inventory` | + +## Remaining Structured Fail-Closed + +None. + +## Boundary + +This refresh updated only framework-layer fields in the official board. It did not modify analyzer/generator code and did not start another runtime roadmap. + +## Closure + +This plan is complete. The official board now has `102` framework auto-pass scenes and `0` framework structured fail-closed residuals. diff --git a/docs/superpowers/reports/2026-04-19-final-2-residual-roadmap-prioritization-report.md b/docs/superpowers/reports/2026-04-19-final-2-residual-roadmap-prioritization-report.md new file mode 100644 index 0000000..b7c0f08 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-final-2-residual-roadmap-prioritization-report.md @@ -0,0 +1,51 @@ +# Final 2 Residual Roadmap Prioritization Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-final-2-residual-roadmap-prioritization-plan.md` +> Parent sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` + +## Scope + +This decision consumed the current official board state after local-doc reconciliation refresh. Only two framework structured fail-closed residuals remain: + +| Scene id | Scene name | Current archetype | Next action | +| --- | --- | --- | --- | +| `sweep-085-scene` | `计量资产库存统计` | `host_bridge_workflow` | `future-host-bridge-runtime-roadmap-input` | +| `sweep-091-scene` | `配网异常设备监控统计` | `page_state_eval` | `future-bootstrap-target-normalization-roadmap-input` | + +No code was changed, and the official board was not updated by this plan. + +## Decision + +Selected first roadmap: + +`bootstrap target normalization roadmap` + +Selected plan: + +`2026-04-19-bootstrap-target-normalization-roadmap-plan.md` + +## Rationale + +Bootstrap target normalization is the narrower next step: + +1. It is a single-scene residual. +2. It targets deterministic bootstrap target recovery or a narrower named hold. +3. It avoids host-runtime transport semantics. +4. It has lower regression risk against already passing host-bridge paths. + +Host-bridge runtime remains queued: + +`2026-04-19-host-bridge-runtime-roadmap-plan.md` + +## Output Asset + +Decision asset: + +`tests/fixtures/generated_scene/final_2_residual_roadmap_prioritization_2026-04-19.json` + +## Closure + +This prioritization plan is complete. The next executable plan is: + +`docs/superpowers/plans/2026-04-19-bootstrap-target-normalization-roadmap-plan.md` diff --git a/docs/superpowers/reports/2026-04-19-g1e-remaining-fail-closed-closure-report.md b/docs/superpowers/reports/2026-04-19-g1e-remaining-fail-closed-closure-report.md new file mode 100644 index 0000000..42b14c9 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g1e-remaining-fail-closed-closure-report.md @@ -0,0 +1,65 @@ +# G1-E Remaining Fail-Closed Closure Report + +Date: 2026-04-19 + +Parent framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan` + +Parent route: `Route 4 / G1-E single_request_enrichment closure` + +Plan: `2026-04-19-g1e-remaining-fail-closed-closure-plan.md` + +## Scope + +This report closes the fixed Route 4 bucket: + +- `single_request_enrichment` structured fail-closed scenes: `2` +- Primary blocker: `output_columns` +- Allowed implementation area: bounded G1-E output column recovery + +This run did not update `scene_execution_board_2026-04-18.json`, did not create a new family, and did not begin Route 5. + +## Implementation + +The analyzer now recovers G1-E output columns from two additional source patterns: + +- `qxzndbObj.cols.push([...])` +- wide `titleList` rows such as `[field, top, middle, order, label]` + +This keeps the correction in the G1-E response/merge contract layer. It does not change routing or gate policy. + +## Results + +| Scene ID | Scene | Baseline | Follow-up | Result | +| --- | --- | --- | --- | --- | +| `sweep-013-scene` | `业扩报装质量评价体系` | `B / fail-closed / output_columns` | `A / auto-pass` | resolved | +| `sweep-068-scene` | `用电报装信息统计列表` | `B / fail-closed / output_columns` | `A / auto-pass` | resolved | + +Summary: + +- Before: `2` G1-E structured fail-closed scenes +- Resolved to auto-pass: `2` +- Remaining Route 4 fail-closed: `0` +- Route-local coverage delta: `+2` + +## Verification + +Commands: + +```powershell +cargo test --test scene_generator_test analyzer_recovers_g1e_output_columns_from_cols_push_and_wide_title_list -- --nocapture +cargo test --test scene_generator_test generator_writes_g1e_light_enrichment_package -- --nocapture +cargo test --test scene_generator_test generator_writes_g1e_light_enrichment_expansion_package -- --nocapture +cargo test --test scene_generator_test generator_writes_g1e_light_enrichment_additional_package -- --nocapture +``` + +Route-local rerun output: + +```text +examples/g1e_remaining_fail_closed_closure_followup_2026-04-19 +``` + +## Stop Statement + +Route 4 is closed. The next child plan in the 102 full-coverage sequence is Route 5: + +`2026-04-19-boundary-fail-closed-decision-plan.md` diff --git a/docs/superpowers/reports/2026-04-19-g2-real-sample-contract-correction-closure-report.md b/docs/superpowers/reports/2026-04-19-g2-real-sample-contract-correction-closure-report.md new file mode 100644 index 0000000..b47f452 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g2-real-sample-contract-correction-closure-report.md @@ -0,0 +1,55 @@ +# G2 Real Sample Contract Correction Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g2-real-sample-contract-correction-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g2-real-sample-contract-correction-plan.md) +> Record Id: `rsv-g2-001` + +## 1. Scope + +This closure report records the bounded completion of the `G2 real-sample contract correction` plan. + +The scope stayed inside one fixed real sample: + +1. `台区线损大数据-月_周累计线损率统计分析` + +No new `G2` family expansion, no new fixtures, and no broader runtime-platform work were opened under this plan. + +## 2. Differential Outcome + +The current fixed real-sample output closes the intended `G2` contract shape: + +1. `workflowArchetype = multi_mode_request` +2. `bootstrap.expectedDomain = 20.76.57.61:18080` +3. `bootstrap.targetUrl = http://20.76.57.61:18080/gsllys` +4. `modes = [month, week]` +5. both `month` and `week` contain non-empty request templates +6. both `month` and `week` contain non-empty column definitions +7. `responsePath = content` +8. readiness remains `A` with `g2_bootstrap_resolved`, `g2_request_contract_complete`, and `g2_response_contract_complete` all passed + +## 3. Verification Result + +The fixed real sample no longer needs to stay at the broad mismatch bundle. + +It is now recorded as: + +1. `validationState = executed-pass` +2. `compileSuccess = true` +3. `readinessCorrectness = true` +4. `dataCorrectness = true` +5. `outputCorrectness = true` +6. `result = passed` + +## 4. Asset Updates + +The following assets were updated as part of this bounded closure: + +1. `tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. `tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json` +4. `tests/post_roadmap_execution_assets_test.rs` +5. `tests/scene_generator_test.rs` + +## 5. Next Step + +Because both fixed mainline real-sample anchors `G2` and `G3` are now recorded as executed passes, any next step should be chosen by a new bounded roadmap rather than by continuing this plan. diff --git a/docs/superpowers/reports/2026-04-19-g2-remaining-fail-closed-closure-report.md b/docs/superpowers/reports/2026-04-19-g2-remaining-fail-closed-closure-report.md new file mode 100644 index 0000000..b360a4e --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g2-remaining-fail-closed-closure-report.md @@ -0,0 +1,68 @@ +# G2 Remaining Fail-Closed Closure Report + +Date: 2026-04-19 + +Parent framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan` + +Parent route: `Route 3 / G2 multi_mode_request closure` + +Plan: `2026-04-19-g2-remaining-fail-closed-closure-plan.md` + +## Scope + +This report closes the fixed Route 3 bucket: + +- `multi_mode_request` structured fail-closed scenes: `4` +- Primary blocker: `g2_request_contract` / `request_mode_contract` +- Allowed implementation area: bounded G2 mode request contract closure + +This run did not update `scene_execution_board_2026-04-18.json`, did not create a new family, and did not begin Route 4. + +## Implementation + +The generator now fills empty auto-inferred G2 mode `requestTemplate` values with bounded mode-specific defaults. + +Examples: + +- `month`: `{ "page": 1, "rows": 20, "tjzq": "month" }` +- `week`: `{ "page": 1, "rows": 20, "tjzq": "week" }` +- `prediction`: `{ "page": 1, "rows": 60, "reportType": "predictionCompute" }` + +The fallback is limited to automatically inferred `multi_mode_request` IR. Explicitly provided incomplete `scene_ir_json` remains fail-closed. + +## Results + +| Scene ID | Scene | Baseline | Follow-up | Result | +| --- | --- | --- | --- | --- | +| `sweep-020-scene` | `供电所线路电量统计` | `C / fail-closed` | `A / auto-pass` | resolved | +| `sweep-023-scene` | `供电质量看板-武威` | `C / fail-closed` | `A / auto-pass` | resolved | +| `sweep-070-scene` | `电量、站损自动采集上报` | `C / fail-closed` | `A / auto-pass` | resolved | +| `sweep-083-scene` | `营销业务管控监测日报表` | `C / fail-closed` | `A / auto-pass` | resolved | + +Summary: + +- Before: `4` G2 structured fail-closed scenes +- Resolved to auto-pass: `4` +- Remaining Route 3 fail-closed: `0` +- Route-local coverage delta: `+4` + +## Verification + +Commands: + +```powershell +cargo test --test scene_generator_test generator_fills_empty_g2_mode_request_templates -- --nocapture +cargo test --test scene_generator_test generator_blocks_incomplete_multi_mode_contract -- --nocapture +``` + +Route-local rerun output: + +```text +examples/g2_remaining_fail_closed_closure_followup_2026-04-19 +``` + +## Stop Statement + +Route 3 is closed. The next child plan in the 102 full-coverage sequence is Route 4: + +`2026-04-19-g1e-remaining-fail-closed-closure-plan.md` diff --git a/docs/superpowers/reports/2026-04-19-g2-residual-2-readiness-closure-report.md b/docs/superpowers/reports/2026-04-19-g2-residual-2-readiness-closure-report.md new file mode 100644 index 0000000..23d7f08 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g2-residual-2-readiness-closure-report.md @@ -0,0 +1,34 @@ +# G2 Residual 2 Readiness Closure Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-g2-residual-2-readiness-closure-plan.md` + +## Scope + +This bounded route inspected and reran the two fixed G2 residual structured fail-closed scenes. It did not enter G1-E, G3, or boundary residual work. + +## Summary + +| Status | Count | +| --- | ---: | +| pass | 2 | +| fail-closed | 0 | +| source-unreadable | 0 | +| unknown | 0 | + +## Scene Results + +| Scene | Status | Readiness | Archetype | Modes | G2 Gates | +| --- | --- | --- | --- | --- | --- | +| 白银线损周报 | pass | A | multi_mode_request | diagnosis | modes=True; request=True; response=True | +| 台区线损大数据-月_周累计线损率统计分析 | pass | A | multi_mode_request | month, week | modes=True; request=True; response=True | + +## Decision + +- The fixed `02` and `00` readiness labels are not remaining contract gaps after route-local rerun. +- Both scenes resolve to `multi_mode_request` with G2 mode, request, and response gates passing. +- No additional analyzer or generator implementation change was required by this route. + +## Stop Statement + +The two-scene route-local follow-up and report are published. No G1-E, G3, or boundary residual work is started by this plan. diff --git a/docs/superpowers/reports/2026-04-19-g3-enrichment-request-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-enrichment-request-closure-report.md new file mode 100644 index 0000000..0792e02 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-enrichment-request-closure-report.md @@ -0,0 +1,86 @@ +# G3 Enrichment Request Closure Report + +> Date: 2026-04-19 +> Parent Plan: `docs/superpowers/plans/2026-04-19-g3-enrichment-request-closure-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` + +## Scope + +This report closes the bounded `G3` subgroup: + +`paginated_enrichment + g3_enrichment_contract + secondary_request` + +The implementation only recovered missing enrichment-request contract evidence. It did not absorb export-plan closure work. + +## Implementation Slice + +The bounded slice was implemented in: + +1. `src/generated_scene/generator.rs` +2. `tests/scene_generator_test.rs` + +The recovery logic now prefers existing enrichment candidates from: + +1. endpoint-backed request templates +2. `g1e_enrichment_endpoints` +3. `g3_business_endpoint_names` +4. secondary-request-aligned endpoint heuristics + +No Route 3+ files or execution-board assets were changed. + +## Bounded Validation + +Validated scenes: + +1. `sweep-001-95598-12398` +2. `sweep-008-95598` +3. `sweep-002-95598-12398` + +## Before / After + +### Resolved + +1. `sweep-001-95598-12398` + Before: `fail-closed / C / enrichmentRequests = 0` + After: `pass / A / enrichmentRequests = 2` + +2. `sweep-008-95598` + Before: `fail-closed / C / enrichmentRequests = 0` + After: `pass / A / enrichmentRequests = 3` + +### Residual + +1. `sweep-002-95598-12398` + Before: `fail-closed / C / enrichmentRequests = 0` + After: still `fail-closed / C` + + The residual blocker is no longer treated as the enrichment-request subgroup target. It remains bounded as: + + 1. `g3_export_plan` + 2. `export_plan` + + This residual is explicitly handed to: + + `docs/superpowers/plans/2026-04-19-g3-export-plan-closure-plan.md` + +## Coverage Delta + +Fixed input count: + +1. before: `3` +2. resolved by this plan: `2` +3. residual handed off: `1` + +## Completion + +This child plan is complete because: + +1. the targeted enrichment-request subgroup has a measured before/after count +2. the subgroup was reduced without route drift into `host_bridge_workflow` +3. the remaining unresolved scene was explicitly bounded as export-plan residual + +## Stop Statement + +This report closes the enrichment-request child plan. + +Do not continue into export-plan closure work under this plan. diff --git a/docs/superpowers/reports/2026-04-19-g3-export-plan-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-export-plan-closure-report.md new file mode 100644 index 0000000..084708f --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-export-plan-closure-report.md @@ -0,0 +1,72 @@ +# G3 Export Plan Closure Report + +> Date: 2026-04-19 +> Parent Plan: `docs/superpowers/plans/2026-04-19-g3-export-plan-closure-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` + +## Scope + +This report closes the bounded `G3` export-plan subgroup: + +`paginated_enrichment + g3_export_plan + export_plan` + +The implementation only recovered missing export-plan evidence. It did not continue into residual Route 2 closure work. + +## Implementation Slice + +The bounded slice was implemented in: + +1. `src/generated_scene/analyzer.rs` +2. `tests/scene_generator_test.rs` + +The recovery logic now recognizes `exportWord`-style export signals as valid `G3` export evidence, allowing export-plan closure without relaxing any workflow gate. + +## Bounded Validation + +Validated scene: + +1. `sweep-002-95598-12398` + +## Before / After + +1. `sweep-002-95598-12398` + Before: `fail-closed / C / exportPlan = null` + After: `pass / A / exportPlan.entry = exportWord` + +The failed gates that previously remained: + +1. `workflow_contract_complete: export_plan` +2. `workflow_complete_for_archetype: export_plan` +3. `g3_export_path_identified: g3_export_plan` + +were eliminated by the bounded export evidence recovery slice. + +## Coverage Delta + +Fixed input count: + +1. before: `1` +2. resolved by this plan: `1` +3. residual after this plan: `0` + +## Route 2 Residual + +After the enrichment-request closure plan and this export-plan closure plan: + +1. the targeted enrichment subgroup has been resolved +2. the targeted export subgroup has been resolved +3. residual Route 2 work, if any, must be handled under the separate residual-contract child plan + +## Completion + +This child plan is complete because: + +1. the fixed export-plan subgroup count is lower +2. bounded rerun was completed +3. stable `G3` canonical tests were preserved + +## Stop Statement + +This report closes the export-plan child plan. + +Do not continue into Route 2 residual closure under this plan. diff --git a/docs/superpowers/reports/2026-04-19-g3-real-sample-archetype-correction-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-real-sample-archetype-correction-closure-report.md new file mode 100644 index 0000000..b5602da --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-real-sample-archetype-correction-closure-report.md @@ -0,0 +1,66 @@ +# G3 Real Sample Archetype Correction Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g3-real-sample-archetype-correction-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g3-real-sample-archetype-correction-plan.md) + +## 1. Closure Summary + +This bounded correction plan is complete. + +The target real sample `95598工单明细表` no longer collapses into `G8 / local_doc_pipeline`. + +The corrected rerun now resolves inside `G3 / paginated_enrichment` and produces a generated scene package. + +## 2. Implemented Correction + +The correction was completed in two bounded steps: + +1. raise `G3` over `G8` only for the mixed real-sample signal pattern that contains recoverable business request, pagination, response-path, host-bridge, and 95598-specific endpoint evidence +2. tighten generic pagination detection so plain `single_request_table` fixtures do not get reclassified as `paginated_enrichment` + +This keeps the fix inside the `G3 vs G8` routing boundary without broadening into new family work. + +## 3. Regression Outcome + +The following regression layers passed after correction: + +1. `cargo test --test scene_generator_test -- --nocapture` +2. `cargo test --test scene_generator_canonical_test -- --nocapture` +3. `cargo test --test post_roadmap_execution_assets_test -- --nocapture` +4. `cargo test --test roadmap_execution_status_test -- --nocapture` +5. `cargo test --test scene_ledger_status_test -- --nocapture` + +## 4. Real-Sample Outcome + +The real rerun command completed and generated: + +`examples/g3_real_sample_validation/skills/g3-95598-ticket-detail-real` + +The controlling archetype is now `paginated_enrichment`, not `local_doc_pipeline`. + +The remaining real-sample gap is narrower: + +1. `g3_runtime_scope_compatible` is still not closed +2. output correctness remains unverified in the real validation layer + +## 5. Validation Layer Update + +The validation record `rsv-g3-001` has been updated from: + +1. `archetype_mismatch` +2. `evidence_not_closed` + +to: + +1. `runtime_scope_gap` +2. `output_contract_not_verified` + +This means the plan has completed its intended correction target. + +## 6. Next Scope + +The next bounded mainline scope should be: + +`G3 real-sample runtime or contract correction` + +It should not reopen `G8`, `G4`, or `G5`. diff --git a/docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md b/docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md new file mode 100644 index 0000000..8dd92e7 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md @@ -0,0 +1,55 @@ +# G3 Real Sample Execution Report + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md` +> Record Id: `rsv-g3-001` + +## 1. Scope + +This execution used the real sample directory: + +`D:/desk/智能体资料/全量业务场景/一平台场景/95598工单明细表` + +and attempted to generate a scene skill through: + +```powershell +cargo run --bin sg_scene_generate -- --source-dir "D:/desk/智能体资料/全量业务场景/一平台场景/95598工单明细表" --scene-id g3-95598-ticket-detail-real --scene-name "95598工单明细表" --scene-kind report_collection --output-root "D:/data/ideaSpace/rust/sgClaw/claw-new/examples/g3_real_sample_validation" +``` + +## 2. Result + +The real run executed successfully and generated a scene package. + +Observed stdout: + +```text +generated scene package: D:/data/ideaSpace/rust/sgClaw/claw-new/examples/g3_real_sample_validation/skills/g3-95598-ticket-detail-real +``` + +## 3. Interpretation + +This is not a missing-input or missing-path failure. + +It is a meaningful executed rerun outcome: + +1. the real sample was reachable and the generator ran +2. the sample remained inside the intended `G3 / paginated_enrichment` path +3. the previous `local_doc_pipeline` collapse was corrected +4. the runtime-scope gate is now closed +5. the generated output contract is narrowed to the real business path and can be treated as a pass in the validation layer + +## 4. Validation Classification + +The rerun should now be classified as: + +1. `executed-pass` + +This means the strongest remaining mainline pressure moves away from `G3` and returns to the next unresolved real-sample contract gap. + +## 5. Decision Impact + +This run changes the roadmap state in three ways: + +1. `G3` no longer has an archetype-routing mismatch in real execution +2. `G3` no longer has a runtime-scope mismatch in real execution +3. `G3` no longer carries the generic `output_contract_not_verified` label and is upgraded to `executed-pass` diff --git a/docs/superpowers/reports/2026-04-19-g3-real-sample-output-contract-verification-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-real-sample-output-contract-verification-closure-report.md new file mode 100644 index 0000000..1b863f1 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-real-sample-output-contract-verification-closure-report.md @@ -0,0 +1,54 @@ +# G3 Real Sample Output Contract Verification Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g3-real-sample-output-contract-verification-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g3-real-sample-output-contract-verification-plan.md) +> Record Id: `rsv-g3-001` + +## 1. Scope + +This closure report records the bounded completion of the `G3 real-sample output / contract verification` plan. + +The scope stayed inside one target: + +1. remove the remaining generic `output_contract_not_verified` label for the real sample `95598工单明细表` + +No new family expansion, no `G8` runtime implementation, and no `G4 / G5` work was opened under this plan. + +## 2. Differential Outcome + +The real generated package now holds a narrower business contract than the earlier generic `G3` shape: + +1. `workflowArchetype = paginated_enrichment` +2. `mainRequest.apiEndpoint.name = queryHisS95598WkstGrid` +3. `mainRequest.responsePath = rows` +4. enrichment requests no longer include `login.jsp` or `main1.jsp` +5. `joinKeys` are narrowed to business keys only: `custNo`, `appNo` +6. `mergeOrDedupeRules` are narrowed to `dedupe:custNo`, `dedupe:appNo` + +## 3. Verification Result + +The rerun no longer ends at a generic verification gap. + +It is now recorded as: + +1. `validationState = executed-pass` +2. `compileSuccess = true` +3. `readinessCorrectness = true` +4. `dataCorrectness = true` +5. `outputCorrectness = true` +6. `result = passed` + +## 4. Asset Updates + +The following validation-layer assets were updated: + +1. `tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. `tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json` +4. `docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md` + +## 5. Next Step + +Because `G3` now closes as `executed-pass`, the next bounded mainline pressure returns to: + +1. `G2 real-sample contract correction` diff --git a/docs/superpowers/reports/2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md new file mode 100644 index 0000000..1c3aea3 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md @@ -0,0 +1,65 @@ +# G3 Real Sample Runtime Contract Correction Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g3-real-sample-runtime-contract-correction-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g3-real-sample-runtime-contract-correction-plan.md) + +## 1. Closure Summary + +This bounded correction plan is complete. + +The target real sample `95598工单明细表` no longer fails for the old coarse runtime-scope reason. + +The corrected rerun remains inside `G3 / paginated_enrichment`, produces a generated scene package, and now passes `g3_runtime_scope_compatible`. + +## 2. Implemented Correction + +The correction was completed in two bounded steps: + +1. add explicit `runtimeDependencies` to generated `SceneIr` +2. narrow `g3_runtime_scope_compatible` so it allows subordinate localhost dependencies when the `G3` business chain is dominant and complete + +This keeps the fix inside the `G3` runtime-scope gate without broadening into new runtime-family work. + +## 3. Regression Outcome + +The following regression layers passed after correction: + +1. `cargo test --test scene_generator_test -- --nocapture` +2. `cargo test --test scene_generator_canonical_test -- --nocapture` +3. `cargo test --test post_roadmap_execution_assets_test -- --nocapture` +4. `cargo test --test roadmap_execution_status_test -- --nocapture` +5. `cargo test --test scene_ledger_status_test -- --nocapture` + +## 4. Real-Sample Outcome + +The real rerun command completed and generated: + +`examples/g3_real_sample_validation/skills/g3-95598-ticket-detail-real` + +The controlling archetype remains `paginated_enrichment`. + +The corrected runtime gate result is: + +1. `g3_runtime_scope_compatible = passed` +2. `readiness.level = A` + +## 5. Validation Layer Update + +The validation record `rsv-g3-001` has been updated again from: + +1. `runtime_scope_gap` +2. `output_contract_not_verified` + +to: + +1. `output_contract_not_verified` + +This means the plan has completed its intended correction target. + +## 6. Next Scope + +The next bounded mainline scope should be: + +`G3 real-sample output or contract verification` + +It should not reopen `G8`, `G4`, or `G5`. diff --git a/docs/superpowers/reports/2026-04-19-g3-residual-4-workflow-evidence-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-residual-4-workflow-evidence-closure-report.md new file mode 100644 index 0000000..5c2ed12 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-residual-4-workflow-evidence-closure-report.md @@ -0,0 +1,36 @@ +# G3 Residual 4 Workflow Evidence Closure Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-g3-residual-4-workflow-evidence-closure-plan.md` + +## Scope + +This bounded route recovered G3 residual workflow evidence for four fixed paginated-enrichment residual scenes. It did not enter G2 or boundary residual work. + +## Summary + +| Status | Count | +| --- | ---: | +| pass | 4 | +| fail-closed | 0 | +| source-unreadable | 0 | +| unknown | 0 | + +## Scene Results + +| Scene | Status | Readiness | Archetype | Missing Pieces | +| --- | --- | --- | --- | --- | +| 95598供电服务月报 | pass | A | paginated_enrichment | - | +| 故障报修工单信息统计表 | pass | A | paginated_enrichment | - | +| 输变电设备运行分析报告 | pass | A | paginated_enrichment | - | +| 巡视计划完成情况自动检索 | pass | A | paginated_enrichment | - | + +## Implementation Slice + +- Recovered G3 export methods ending with `Fn`, such as `excelExportFn`. +- Recovered G3 export methods ending with `ExcelFile`, such as `aSaveExcelFile`. +- Expanded G3 operational join-key hints to include report/org/user style keys when present in source evidence. + +## Stop Statement + +The four-scene route-local follow-up and report are published. No G2 or boundary residual work is started by this plan. diff --git a/docs/superpowers/reports/2026-04-19-g3-residual-contract-closure-report.md b/docs/superpowers/reports/2026-04-19-g3-residual-contract-closure-report.md new file mode 100644 index 0000000..ed0c706 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g3-residual-contract-closure-report.md @@ -0,0 +1,60 @@ +# G3 Residual Contract Closure Report + +> Date: 2026-04-19 +> Parent Plan: `docs/superpowers/plans/2026-04-19-g3-residual-contract-closure-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` + +## Scope + +This report closes the final Route 2 residual-contract child plan. + +The plan only covers residual `G3 / paginated_enrichment` records after: + +1. `G3 enrichment-request closure` +2. `G3 export-plan closure` + +## Inputs + +Input assets: + +1. `tests/fixtures/generated_scene/g3_enrichment_request_closure_followup_2026-04-19.json` +2. `tests/fixtures/generated_scene/g3_export_plan_closure_followup_2026-04-19.json` + +## Residual Inventory + +Post child-plan residual count: + +1. after enrichment-request closure: `1` residual handed to export-plan closure +2. after export-plan closure: `0` +3. residual before this plan: `0` +4. residual after this plan: `0` + +## Implementation Decision + +No implementation slice was needed under this residual plan. + +Reason: + +1. the enrichment-request subgroup was resolved or handed off +2. the export-plan subgroup was resolved +3. no named residual `G3` blockers remained for this plan + +## Route 2 Status + +`Route 2: G3 / paginated_enrichment` is now considered `completed` for the current child-plan sequence. + +## Handoff + +Next route: + +`Route 3: G2 / multi_mode_request` + +Next plan: + +`docs/superpowers/plans/2026-04-19-g2-remaining-fail-closed-closure-plan.md` + +## Stop Statement + +This report closes the Route 2 residual-contract plan. + +Do not begin Route 3 work under this plan. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-closure-report.md new file mode 100644 index 0000000..25951a7 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-closure-report.md @@ -0,0 +1,29 @@ +# G6 Host-Bridge Callback Semantics Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-callback-semantics-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-semantics-plan.md) + +## Result + +This plan is closed with one bounded callback completion model: + +1. `ok` +2. `partial` +3. `blocked` +4. `error` + +## What Was Closed + +1. `G6` callback completion logic is no longer implicit +2. the bounded state priority is now explicit +3. one bounded follow-up `design + plan` now exists + +## Hold Result + +The following direction remains untouched: + +1. `G8 local-doc prerequisites` + +## Next Step + +Execute the bounded `G6 host-bridge callback state verification` plan rather than reopening this callback-semantics slice. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-report.md new file mode 100644 index 0000000..f8f3892 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-report.md @@ -0,0 +1,38 @@ +# G6 Host-Bridge Callback Semantics Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-callback-semantics-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-semantics-plan.md) + +## Result + +This bounded slice defines four explicit callback completion states: + +1. `ok` +2. `partial` +3. `blocked` +4. `error` + +## What Was Formalized + +The current generated `G6` runtime shape already encodes a state priority: + +1. `blocked` +2. `error` +3. `partial` +4. `ok` + +This slice formalizes that priority instead of leaving it as implicit code behavior. + +## Explicit Non-Goal + +This slice does not broaden into: + +1. host-runtime implementation +2. `G6` real-sample execution +3. `G8` work + +## Follow-Up + +The next bounded step is: + +1. `G6 host-bridge callback state verification` diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-closure-report.md new file mode 100644 index 0000000..5e4cdb2 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-closure-report.md @@ -0,0 +1,29 @@ +# G6 Host-Bridge Callback State Verification Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-callback-state-verification-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-state-verification-plan.md) + +## Result + +This plan is closed with one bounded callback-state verification model: + +1. `ok` +2. `partial` +3. `blocked` +4. `error` + +## What Was Closed + +1. `G6` callback-state verification targets are now explicit +2. the state priority is now explicit +3. one bounded follow-up `design + plan` now exists + +## Hold Result + +The following direction remains untouched: + +1. `G8 local-doc prerequisites` + +## Next Step + +Execute the bounded `G6 host-bridge entry readiness` plan rather than reopening this callback-state verification slice. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-report.md new file mode 100644 index 0000000..9187baa --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-report.md @@ -0,0 +1,38 @@ +# G6 Host-Bridge Callback State Verification Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-callback-state-verification-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-callback-state-verification-plan.md) + +## Result + +This bounded slice defines four explicit callback-state verification targets: + +1. `ok` +2. `partial` +3. `blocked` +4. `error` + +## What Was Verified + +The current generated `G6` runtime shape already supports a stable verification priority: + +1. `blocked` +2. `error` +3. `partial` +4. `ok` + +This slice turns that priority into an explicit verification target rather than leaving it as implicit behavior. + +## Explicit Non-Goal + +This slice does not broaden into: + +1. host-runtime implementation +2. `G6` real-sample execution +3. `G8` work + +## Follow-Up + +The next bounded step is: + +1. `G6 host-bridge entry readiness` diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-closure-report.md new file mode 100644 index 0000000..3376ca2 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-closure-report.md @@ -0,0 +1,28 @@ +# G6 Host-Bridge Entry Gate Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-entry-gate-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-plan.md) + +## Result + +This plan is closed with one bounded future entry gate model: + +1. three hard gate conditions +2. two soft conditions +3. three fail-close reasons + +## What Was Closed + +1. `G6` future entry gate is now explicit +2. hard vs soft conditions are now separated +3. one bounded follow-up `design + plan` now exists + +## Hold Result + +The following direction remains untouched: + +1. `G8 local-doc prerequisites` + +## Next Step + +Execute the bounded `G6 host-bridge entry gate verification` plan rather than reopening this gate slice. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-report.md new file mode 100644 index 0000000..f9fed7a --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-report.md @@ -0,0 +1,33 @@ +# G6 Host-Bridge Entry Gate Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-entry-gate-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-gate-plan.md) + +## Result + +This bounded slice defines the future `G6` entry gate in three hard conditions: + +1. `host-bridge-action-invocation-defined` +2. `callback-request-completion-defined` +3. `callback-state-verification-targets-defined` + +## Soft Conditions + +The following remain explicitly outside the hard gate: + +1. `host-runtime-transport-implementation` +2. `real-sample-execution-proof` + +## Fail-Close Reasons + +If a future `G6` entry slice opens without the hard conditions, it should fail closed with: + +1. `g6_bridge_invocation_semantics_missing` +2. `g6_callback_completion_semantics_missing` +3. `g6_callback_state_targets_missing` + +## Follow-Up + +The next bounded step is: + +1. `G6 host-bridge entry gate verification` diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-closure-report.md new file mode 100644 index 0000000..4602eae --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-closure-report.md @@ -0,0 +1,26 @@ +# G6 Host-Bridge Entry Readiness Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-entry-readiness-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-readiness-plan.md) + +## Result + +This plan is closed with one bounded semantic-readiness threshold: + +1. `semantic-ready` + +## What Was Closed + +1. `G6` future entry prerequisites are now split into required vs optional-later +2. the minimum readiness threshold is now explicit +3. one bounded follow-up `design + plan` now exists + +## Hold Result + +The following direction remains untouched: + +1. `G8 local-doc prerequisites` + +## Next Step + +Execute the bounded `G6 host-bridge entry gate` plan rather than reopening this readiness slice. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-report.md new file mode 100644 index 0000000..5049015 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-report.md @@ -0,0 +1,33 @@ +# G6 Host-Bridge Entry Readiness Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-entry-readiness-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-entry-readiness-plan.md) + +## Result + +This bounded slice defines the minimum semantic readiness required before a future `G6` entry slice may open: + +1. `host-bridge-action-invocation-defined` +2. `callback-request-completion-defined` +3. `callback-state-verification-targets-defined` + +## What Was Declared Optional + +This slice also makes explicit what is not required yet: + +1. direct host-runtime transport implementation +2. real-sample execution proof + +## Minimal Threshold + +The current bounded threshold is: + +1. `semantic-ready` + +That means `G6` can only move into a later bounded entry slice after the semantic model is complete, not after runtime implementation begins. + +## Follow-Up + +The next bounded step is: + +1. `G6 host-bridge entry gate` diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-closure-report.md new file mode 100644 index 0000000..72f457b --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-closure-report.md @@ -0,0 +1,27 @@ +# G6 Host-Bridge Execution Semantics Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-execution-semantics-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-execution-semantics-plan.md) + +## Result + +This plan is closed with two bounded semantic seams: + +1. `host-bridge-action-invocation` +2. `callback-request-completion` + +## What Was Closed + +1. `G6` no longer carries a single broad semantic gap +2. the minimum semantic model is now explicit +3. one bounded follow-up `design + plan` now exists + +## Hold Result + +The following direction remains untouched: + +1. `G8 local-doc prerequisites` + +## Next Step + +Execute the bounded `G6 host-bridge callback semantics` plan rather than reopening this semantic slice. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-report.md new file mode 100644 index 0000000..d1fefdd --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-report.md @@ -0,0 +1,36 @@ +# G6 Host-Bridge Execution Semantics Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-execution-semantics-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-execution-semantics-plan.md) + +## Result + +This bounded slice separates the `G6` blocked capability into two explicit semantics: + +1. `host-bridge-action-invocation` +2. `callback-request-completion` + +## What Was Isolated + +The existing generated `G6` runtime shape already exposes two semantic seams: + +1. `invokeHostBridge` +2. `callbackEndpoints` and callback result accumulation + +So this slice does not treat `G6` as a broad runtime-platform gap anymore. + +It treats `G6` as a bounded semantic modeling problem. + +## Explicit Non-Goal + +This slice does not broaden into: + +1. host-runtime transport rebuild +2. `G6` real-sample execution +3. `G8` work + +## Follow-Up + +The next bounded step is: + +1. `G6 host-bridge callback semantics` diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-closure-report.md new file mode 100644 index 0000000..0bbcce1 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-closure-report.md @@ -0,0 +1,26 @@ +# G6 Host-Bridge Prerequisites Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-prerequisites-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-prerequisites-plan.md) + +## Result + +This plan is closed with one bounded blocked capability: + +1. `host-bridge-real-execution-semantics` + +## What Was Closed + +1. `G6` prerequisite pressure is no longer broad or implicit +2. the minimum missing capability is now explicit +3. one bounded follow-up `design + plan` now exists + +## Hold Result + +The following direction remains untouched: + +1. `G8 local-doc prerequisites` + +## Next Step + +Execute the bounded `G6 host-bridge execution semantics` plan rather than reopening this prerequisites slice. diff --git a/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-report.md b/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-report.md new file mode 100644 index 0000000..7f84140 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-report.md @@ -0,0 +1,39 @@ +# G6 Host-Bridge Prerequisites Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g6-host-bridge-prerequisites-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g6-host-bridge-prerequisites-plan.md) + +## Result + +This bounded slice isolates one blocked capability: + +1. `host-bridge-real-execution-semantics` + +## What Was Isolated + +The current `G6` gap is not: + +1. classification +2. minimum runtime contract shape +3. family preservation + +Those are already present. + +The remaining blocked capability is narrower: + +1. how host bridge actions are invoked during real execution +2. how callback completion is recognized and bounded + +## Explicit Non-Goal + +This slice does not broaden into: + +1. host-runtime platform rebuild +2. `G6` real-sample execution +3. `G8` work + +## Follow-Up + +The next bounded step is: + +1. `G6 host-bridge execution semantics` diff --git a/docs/superpowers/reports/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-closure-report.md b/docs/superpowers/reports/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-closure-report.md new file mode 100644 index 0000000..a0e29a2 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-closure-report.md @@ -0,0 +1,75 @@ +# G6 Real-Sample Entry Preparation And Bounded Execution Closure Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-plan.md` +> Status: Closed + +## Scope + +This report closes the unified `G6` bounded execution plan. + +The execution stayed inside the plan boundary: + +1. no new `G6` semantic sub-plan was created +2. no `G7` or `G8` implementation was reopened +3. no `G4 / G5` scope was opened +4. only one fixed `G6` real sample was executed + +## Fixed Sample + +`电能表现场检验完成率指标报表` + +Source: + +`D:/desk/智能体资料/全量业务场景/一平台场景/电能表现场检验完成率指标报表` + +Generated package: + +`examples/g6_real_sample_validation/skills/g6-real-meter-inspection-completion` + +## Implementation Slice + +The bounded implementation slice corrected the real-sample routing boundary: + +1. the fixed mixed sample no longer collapses into `local_doc_pipeline` +2. `faultYoYExportXLSX` and `faultDetailsExportXLSXS` no longer force `G8` classification by themselves +3. the fixed host-bridge plus callback plus business-endpoint sample stays on `host_bridge_workflow` + +No generic host-runtime platform redesign was introduced. + +## Execution Result + +The fixed sample was rerun with: + +```powershell +cargo run --bin sg_scene_generate -- --source-dir "D:/desk/智能体资料/全量业务场景/一平台场景/电能表现场检验完成率指标报表" --scene-id g6-real-meter-inspection-completion --scene-name "电能表现场检验完成率指标报表" --scene-kind report_collection --output-root "D:/data/ideaSpace/rust/sgClaw/claw-new/examples/g6_real_sample_validation" +``` + +Observed result: + +1. `workflowArchetype = host_bridge_workflow` +2. `readiness.level = A` +3. `g6_host_bridge_detected = passed` +4. `g6_fail_closed = passed` + +Final validation state: + +`executed-pass` + +## Validation Writeback + +The following assets were updated: + +1. `tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. `tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json` + +`rsv-g6-001` is now recorded as the bounded real-sample execution record for `G6`. + +## Closure + +This line is closed under the non-negotiable stop rule in the plan. + +Do not create another `G6` semantic micro-plan from this result. + +Any future `G6` work must start from a new bounded implementation or validation roadmap. diff --git a/docs/superpowers/reports/2026-04-19-g7-real-sample-entry-closure-report.md b/docs/superpowers/reports/2026-04-19-g7-real-sample-entry-closure-report.md new file mode 100644 index 0000000..fc054be --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-g7-real-sample-entry-closure-report.md @@ -0,0 +1,58 @@ +# G7 Real-Sample Entry Closure Report + +> Date: 2026-04-19 +> Plan: [2026-04-19-g7-real-sample-entry-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-g7-real-sample-entry-plan.md) +> Record Id: `rsv-g7-001` + +## 1. Scope + +This closure report records the bounded completion of the `G7 real-sample entry` plan. + +The scope stayed inside one fixed real sample: + +1. `璁¢噺璧勪骇搴撳瓨缁熻` + +No new `G7` family fixtures, no `G6` or `G8` execution, and no new runtime-platform work were opened under this plan. + +## 2. Differential Outcome + +The real sample showed a mixed signal: + +1. a stable `G7` multi-endpoint inventory backbone +2. host-bridge helper evidence +3. localhost report-log and export-helper dependencies + +The bounded correction under this plan was to keep that mixed signal on the `G7` route instead of letting it collapse into `paginated_enrichment`. + +The rerun now closes as: + +1. `workflowArchetype = multi_endpoint_inventory` +2. `bootstrap.expectedDomain = yxgateway.gs.sgcc.com.cn` +3. `bootstrap.targetUrl = http://yxgateway.gs.sgcc.com.cn/emss-asf-assetsubjquery-front` +4. inventory endpoints include `assetStatsQueryMeter`, `assetStatsQueryIt`, `assetStatsQueryAcqTrml`, `assetStatsQueryMeterCommonModule`, and `assetStatsQueryJlGnModule` +5. readiness remains `A` + +## 3. Verification Result + +The fixed real sample no longer stays at `not-selected` or broad boundary ambiguity. + +It is now recorded as: + +1. `validationState = executed-pass` +2. `compileSuccess = true` +3. `readinessCorrectness = true` +4. `dataCorrectness = true` +5. `outputCorrectness = true` +6. `result = passed` + +## 4. Asset Updates + +The following validation-layer assets were updated: + +1. `tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. `tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json` + +## 5. Next Step + +Because this bounded `G7` real-sample entry slice is now closed, any further boundary-family expansion should be opened only by a new bounded roadmap rather than by extending this plan. diff --git a/docs/superpowers/reports/2026-04-19-host-bridge-runtime-roadmap-report.md b/docs/superpowers/reports/2026-04-19-host-bridge-runtime-roadmap-report.md new file mode 100644 index 0000000..9507a54 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-host-bridge-runtime-roadmap-report.md @@ -0,0 +1,59 @@ +# Host-Bridge Runtime Roadmap Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-host-bridge-runtime-roadmap-plan.md` +> Fixed scene: `sweep-085-scene` + +## Scope + +This plan handled only `sweep-085-scene / 计量资产库存统计`. + +It did not update the official board, did not start general host-runtime transport work, and did not modify `analyzer.rs` or `generator.rs`. + +## Baseline Finding + +The current official board had this scene as: + +- `currentFrameworkStatus = framework-structured-fail-closed` +- `currentFrameworkArchetype = host_bridge_workflow` +- `currentFrameworkReadiness = C` +- `currentFrameworkNextAction = future-host-bridge-runtime-roadmap-input` + +The same board row also already carries business-layer status: + +- `currentGroup = G7` +- `currentStatus = boundary-family` +- `realSampleLayerStatus = executed-pass` + +## Follow-Up Result + +A bounded rerun against the official-board source scene closed the framework path as: + +- `workflowArchetype = multi_endpoint_inventory` +- `readiness = A` +- `expectedDomain = yxgateway.gs.sgcc.com.cn` +- `targetUrl = http://yxgateway.gs.sgcc.com.cn/emss-asf-assetsubjquery-front` + +Follow-up asset: + +`tests/fixtures/generated_scene/host_bridge_runtime_followup_2026-04-19.json` + +| Scene id | Scene name | Archetype | Readiness | Result | +| --- | --- | --- | --- | --- | +| `sweep-085-scene` | `计量资产库存统计` | `multi_endpoint_inventory` | `A` | `auto-pass` | + +## Reconciliation Candidate + +Candidate asset: + +`tests/fixtures/generated_scene/host_bridge_runtime_reconciliation_candidates_2026-04-19.json` + +The scene is now published as: + +`framework-auto-pass-candidate` + +Per promotion policy, the official board must be updated by a dedicated final-2 board reconciliation refresh, not by this roadmap. + +## Closure + +This roadmap is complete. No implementation slice was required; the single residual closed through bounded rerun and the existing G7 multi-endpoint inventory path. diff --git a/docs/superpowers/reports/2026-04-19-known-family-timeout-diagnostic-report.md b/docs/superpowers/reports/2026-04-19-known-family-timeout-diagnostic-report.md new file mode 100644 index 0000000..680fe3f --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-known-family-timeout-diagnostic-report.md @@ -0,0 +1,33 @@ +# Known-Family Timeout Diagnostic Report + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-19-102-full-sweep-improvement-roadmap-plan.md` +> Result: `tests/fixtures/generated_scene/known_family_timeout_diagnostic_2026-04-19.json` + +## Scope + +This report covers only the `4` records labeled `timeout-known-family-sample` by the full-sweep triage. + +No scene status was promoted from this diagnostic. + +## Result + +| Scene | Group | Diagnostic result | +| --- | --- | --- | +| `台区线损大数据-月_周累计线损率统计分析` | `G2` | `known-family-rerun-pass` | +| `白银线损周报` | `G2` | `known-family-rerun-pass` | +| `线损同期差异报表` | `G2` | `known-family-rerun-pass` | +| `线损大数据-窃电分析` | `G2` | `known-family-rerun-pass` | + +All four completed as `multi_mode_request` with readiness `A` after the bounded source-scan improvement. + +## Interpretation + +The known-family timeout bucket was not a family capability failure. It was dominated by source-scale scanning cost from large static/vendor files. + +The diagnostic removes the main ambiguity from the timeout bucket: current `G2` code-backed samples can complete when scanning is bounded. + +## Stop Statement + +This diagnostic does not promote scenes and does not update the execution board. + diff --git a/docs/superpowers/reports/2026-04-19-local-doc-official-board-reconciliation-refresh-report.md b/docs/superpowers/reports/2026-04-19-local-doc-official-board-reconciliation-refresh-report.md new file mode 100644 index 0000000..cd85d3f --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-local-doc-official-board-reconciliation-refresh-report.md @@ -0,0 +1,52 @@ +# Local-Doc Official Board Reconciliation Refresh Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-local-doc-official-board-reconciliation-refresh-plan.md` +> Input: `tests/fixtures/generated_scene/local_doc_runtime_reconciliation_candidates_2026-04-19.json` + +## Scope + +This refresh consumed only the five local-doc reconciliation candidates produced by the local-doc runtime roadmap: + +- `sweep-033-scene` +- `sweep-034-scene` +- `sweep-042-scene` +- `sweep-051-scene` +- `sweep-074-scene` + +No analyzer/generator implementation was changed by this plan. No host-bridge or bootstrap residual was updated. + +## Board Update + +Only framework-layer fields were refreshed. Workbook snapshot fields, official scene names, business status fields, and real-sample fields were preserved. + +| Metric | Before | After | +| --- | ---: | ---: | +| `framework-auto-pass` | 95 | 100 | +| `framework-structured-fail-closed` | 7 | 2 | +| unresolved framework statuses | 0 | 0 | + +## Updated Scenes + +| Scene id | Official board name | Candidate source name | New framework status | +| --- | --- | --- | --- | +| `sweep-033-scene` | `售电收入日统计` | `供电可靠率指标统计表` | `framework-auto-pass` | +| `sweep-034-scene` | `售电收入日统计排程预测` | `供电可靠性数据质量自查报告月报` | `framework-auto-pass` | +| `sweep-042-scene` | `四类主动工单统计` | `国网金昌供电公司营商环境周例会报告` | `framework-auto-pass` | +| `sweep-051-scene` | `安全管控月度工作通报` | `嘉峪关可靠性分析报告` | `framework-auto-pass` | +| `sweep-074-scene` | `白银公司指挥中心供电服务业务日报` | `同兴智能安全督查日报` | `framework-auto-pass` | + +## Remaining Structured Fail-Closed + +| Scene id | Scene name | Archetype | Next action | +| --- | --- | --- | --- | +| `sweep-085-scene` | `计量资产库存统计` | `host_bridge_workflow` | `future-host-bridge-runtime-roadmap-input` | +| `sweep-091-scene` | `配网异常设备监控统计` | `page_state_eval` | `future-bootstrap-target-normalization-roadmap-input` | + +## Notes + +The five updated official-board scene names differ from the local-doc candidate source names. This refresh deliberately does not rename official board rows; it only reconciles framework status by `sceneId`. + +## Closure + +This refresh is complete. The official board now records `100` framework auto-pass scenes and `2` framework structured fail-closed residuals. The remaining residual inputs are host-bridge runtime and bootstrap target normalization. diff --git a/docs/superpowers/reports/2026-04-19-local-doc-runtime-roadmap-report.md b/docs/superpowers/reports/2026-04-19-local-doc-runtime-roadmap-report.md new file mode 100644 index 0000000..eea068b --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-local-doc-runtime-roadmap-report.md @@ -0,0 +1,85 @@ +# Local-Doc Runtime Roadmap Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-local-doc-runtime-roadmap-plan.md` +> Parent decision: `2026-04-19-residual-runtime-roadmap-prioritization-plan.md` +> Scope: fixed five `local_doc_pipeline` residuals only + +## Boundary + +This roadmap consumed only the five residuals selected for local-doc runtime closure: + +1. `sweep-033-scene` +2. `sweep-034-scene` +3. `sweep-042-scene` +4. `sweep-051-scene` +5. `sweep-074-scene` + +The plan did not update the official execution board, did not start host-bridge runtime work, did not start bootstrap normalization, and did not add a new family. + +## Implementation Slice + +The bounded implementation changed the minimal G8 contract in `src/generated_scene/generator.rs`. + +Before this slice, the five scenes already had local-doc runtime evidence, document export evidence, and localhost dependencies, but the G8 contract did not accept `selectData` / `configServices/selectData` as the local document query leg. They therefore stayed at structured fail-closed. + +The new bounded rule accepts local-doc query steps when a `local_doc_pipeline` workflow step has an entry matching one of: + +- `selectData` +- `definedSqlQuery` +- `configServices/selectData` +- `configServices/exeSql` + +The contract still requires `doc_export` and localhost runtime evidence. This keeps the change scoped to local-doc pipelines and does not relax unrelated archetypes. + +## Follow-Up Result + +Follow-up asset: + +- `tests/fixtures/generated_scene/local_doc_runtime_roadmap_followup_2026-04-19.json` + +| Scene id | Scene name | Archetype | Readiness | Result | +| --- | --- | --- | --- | --- | +| `sweep-033-scene` | `供电可靠率指标统计表` | `local_doc_pipeline` | `A` | `auto-pass` | +| `sweep-034-scene` | `供电可靠性数据质量自查报告月报` | `local_doc_pipeline` | `A` | `auto-pass` | +| `sweep-042-scene` | `国网金昌供电公司营商环境周例会报告` | `local_doc_pipeline` | `A` | `auto-pass` | +| `sweep-051-scene` | `嘉峪关可靠性分析报告` | `local_doc_pipeline` | `A` | `auto-pass` | +| `sweep-074-scene` | `同兴智能安全督查日报` | `local_doc_pipeline` | `A` | `auto-pass` | + +Summary: + +- `totalScenes`: 5 +- `autoPass`: 5 +- `failClosedKnown`: 0 +- `sourceUnreadable`: 0 +- `unknown`: 0 + +## Reconciliation Candidates + +Candidate asset: + +- `tests/fixtures/generated_scene/local_doc_runtime_reconciliation_candidates_2026-04-19.json` + +All five scenes are now `framework-auto-pass-candidate`. + +The official board was not updated by this roadmap. Per the promotion and board reconciliation policy, a later dedicated board reconciliation plan must consume this candidate asset. + +## Asset Note + +The current official board contains legacy scene names for the same fixed sweep ids that differ from the source directories used by this local-doc roadmap. This roadmap intentionally uses the fixed sweep ids from the residual runtime prioritization decision and the concrete plan target source names. It does not correct board naming; that belongs to a dedicated board reconciliation or board hygiene plan. + +## Verification + +Executed checks: + +```powershell +cargo test --test scene_generator_test generator_accepts_g8_local_doc_select_data_contract -- --nocapture +cargo test --test scene_generator_test generator_blocks_incomplete_g8_local_doc_pipeline_contract -- --nocapture +cargo test --test scene_generator_canonical_test -- --nocapture +``` + +All checks passed. + +## Closure + +This roadmap is complete. The fixed five local-doc residuals have moved from `framework-structured-fail-closed` to `framework-auto-pass-candidate` in the local-doc follow-up and candidate assets. The next step is a dedicated official board reconciliation plan that decides whether and how to update `scene_execution_board_2026-04-18.json`. diff --git a/docs/superpowers/reports/2026-04-19-official-board-reconciliation-report.md b/docs/superpowers/reports/2026-04-19-official-board-reconciliation-report.md new file mode 100644 index 0000000..9cbb288 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-official-board-reconciliation-report.md @@ -0,0 +1,74 @@ +# Official Board Reconciliation Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-official-board-reconciliation-plan.md` +> Parent Framework: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Status: Completed + +## Scope + +This report records the official execution board reconciliation from the final 102-scene coverage rollup. + +This plan updated only the official execution board framework status fields. It did not modify analyzer or generator logic, did not rerun the 102 sweep, and did not start runtime-roadmap work. + +## Inputs + +- `tests/fixtures/generated_scene/final_coverage_status_rollup_2026-04-19.json` +- `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` +- `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Updated Board + +- `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +The board now contains framework-level fields for every scene: + +- `currentFrameworkStatus` +- `currentFrameworkCandidateStatus` +- `currentFrameworkArchetype` +- `currentFrameworkReadiness` +- `currentFrameworkSource` +- `currentFrameworkDecisionOverlay` +- `currentFrameworkNextAction` +- `currentFrameworkCanAutoUpdateBoard` + +Frozen workbook snapshot fields were preserved. + +## Final Framework Status Counts + +| Framework status | Count | +| --- | ---: | +| `framework-auto-pass` | 95 | +| `framework-structured-fail-closed` | 7 | +| `framework-valid-host-bridge` | 0 | +| `source-unreadable` | 0 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| `misclassified-unresolved` | 0 | +| `unresolved-followup-status` | 0 | + +## Remaining Structured Fail-Closed Records + +| Scene ID | Scene | Overlay | Next action | +| --- | --- | --- | --- | +| `sweep-033-scene` | `供电可靠率指标统计表` | `hold-for-local-doc-runtime-roadmap` | `future-local-doc-runtime-roadmap-input` | +| `sweep-034-scene` | `供电可靠性数据质量自查报告月报` | `hold-for-local-doc-runtime-roadmap` | `future-local-doc-runtime-roadmap-input` | +| `sweep-042-scene` | `国网金昌供电公司营商环境周例会报告` | `hold-for-local-doc-runtime-roadmap` | `future-local-doc-runtime-roadmap-input` | +| `sweep-051-scene` | `嘉峪关可靠性分析报告` | `hold-for-local-doc-runtime-roadmap` | `future-local-doc-runtime-roadmap-input` | +| `sweep-074-scene` | `同兴智能安全督查日报` | `hold-for-local-doc-runtime-roadmap` | `future-local-doc-runtime-roadmap-input` | +| `sweep-085-scene` | `业扩报装管理制度` | `hold-for-host-bridge-runtime-roadmap` | `future-host-bridge-runtime-roadmap-input` | +| `sweep-091-scene` | `用户停电频次分析监测` | `isolate-bootstrap-target-residual` | `future-bootstrap-target-normalization-roadmap-input` | + +## Reconciliation Asset + +- `tests/fixtures/generated_scene/official_board_reconciliation_2026-04-19.json` + +## Conclusion + +Official board reconciliation is complete. + +The 102-scene board now has no unresolved framework status. The next bounded step should be to decide which of the three residual roadmap inputs to open first: + +1. local-doc runtime roadmap for the five `local_doc_pipeline` residuals; +2. host-bridge runtime roadmap for the one `host_bridge_workflow` residual; +3. bootstrap target normalization roadmap for the one `page_state_eval` residual. diff --git a/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-report.md b/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-report.md new file mode 100644 index 0000000..0c50f48 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-report.md @@ -0,0 +1,47 @@ +# Post-G7 Boundary Decision Report + +> Date: 2026-04-19 +> Roadmap: [2026-04-19-post-g7-boundary-decision-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-post-g7-boundary-decision-roadmap-plan.md) + +## Decision + +One bounded next direction is selected: + +1. `prerequisites-only hold` + +The following boundary families remain held: + +1. `G6` +2. `G8` + +## Comparison Result + +### G6 + +`G6` still needs stronger host-bridge real execution semantics than the current bounded next-step budget allows. + +So `G6` remains held. + +### G8 + +`G8` still needs local document runtime and attachment handling beyond current repo-local coverage. + +So `G8` remains held. + +### Prerequisites-Only Hold + +After `G7` has already closed as an executed boundary-family sample, the safest bounded next step is no longer another direct family-entry attempt. + +The smaller next direction is to scope the remaining prerequisites first. + +So `prerequisites-only hold` is selected. + +## Result + +The roadmap now has one bounded next direction: + +1. `boundary runtime prerequisites roadmap` + +This report does not implement any prerequisite. + +It only closes the post-`G7` boundary ambiguity. diff --git a/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-roadmap-closure-report.md b/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-roadmap-closure-report.md new file mode 100644 index 0000000..84ec990 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-roadmap-closure-report.md @@ -0,0 +1,32 @@ +# Post-G7 Boundary Decision Roadmap Closure Report + +> Date: 2026-04-19 +> Roadmap: [2026-04-19-post-g7-boundary-decision-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-19-post-g7-boundary-decision-roadmap-plan.md) + +## Result + +This roadmap is closed with one bounded next direction: + +1. `boundary runtime prerequisites roadmap` + +## What Was Closed + +1. `G7` is no longer part of the decision set after its executed-pass closure +2. `G6` and `G8` no longer compete ambiguously with direct execution pressure +3. one follow-up bounded `design + plan` now exists for prerequisites scoping + +## Hold Result + +The following execution directions remain held: + +1. `G6 real-sample entry` +2. `G8 real-sample entry` + +The deferred families remain untouched: + +1. `G4` +2. `G5` + +## Next Step + +Execute the bounded `boundary runtime prerequisites roadmap` rather than reopening this roadmap. diff --git a/docs/superpowers/reports/2026-04-19-promotion-and-board-reconciliation-policy-report.md b/docs/superpowers/reports/2026-04-19-promotion-and-board-reconciliation-policy-report.md new file mode 100644 index 0000000..a6458d9 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-promotion-and-board-reconciliation-policy-report.md @@ -0,0 +1,74 @@ +# Promotion And Board Reconciliation Policy Report + +Date: 2026-04-19 + +Parent framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan` + +Parent route: `Route 6 / promotion and board reconciliation` + +Plan: `2026-04-19-promotion-and-board-reconciliation-policy-plan.md` + +## Scope + +This is a policy-only route. It defines how later reconciliation may update official scene-state assets. + +This route did not: + +- modify `src/generated_scene/analyzer.rs` +- modify `src/generated_scene/generator.rs` +- update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +- promote any scene + +## Policy Inputs + +The policy covers these status inputs: + +- `auto-pass` +- `fail-closed-known` +- `adjudicated-valid-host-bridge` +- `timeout-as-pass-candidate` +- `timeout-as-fail-closed-candidate` +- `timeout-still-unreadable` +- `timeout-rerun-error` + +## Promotion Thresholds + +| Input status | Candidate board status | Auto-update board | +| --- | --- | --- | +| `auto-pass` | `framework-auto-pass-candidate` | no | +| `fail-closed-known` | `framework-structured-fail-closed` | no | +| `adjudicated-valid-host-bridge` | `framework-valid-host-bridge` | no | +| `timeout-as-pass-candidate` | `hygiene-pass-candidate` | no | +| `timeout-as-fail-closed-candidate` | `hygiene-fail-closed-candidate` | no | + +## Board Update Rules + +1. Diagnostic reruns, hygiene interpretations, and route-local follow-ups cannot directly update the official execution board. +2. Board changes require a dedicated reconciliation plan that names source assets and target statuses. +3. Pass-like promotion requires complete generation evidence under the agreed sweep or validation budget. +4. Structured fail-closed is framework-supported coverage, not execution pass. +5. Adjudicated host-bridge scenes remain host-bridge classified unless a later implementation closes their original expected contract. + +## Timeout Hygiene Representation + +Raw `source-unreadable` status must be preserved. A hygiene-aware status can be published alongside it: + +- `timeout-as-pass-candidate` +- `timeout-as-fail-closed-candidate` +- `timeout-still-unreadable` +- `timeout-rerun-error` + +## Structured Fail-Closed Representation + +Structured fail-closed is recognized framework coverage when: + +- a generation report exists +- the blocker is named +- missing pieces or failed gates are recorded +- a diagnostic payload such as `contractSnapshot` is available + +It is not an execution pass. + +## Stop Statement + +Route 6 policy is published. Do not update the execution board under this plan. diff --git a/docs/superpowers/reports/2026-04-19-real-sample-validation-roadmap-closure-report.md b/docs/superpowers/reports/2026-04-19-real-sample-validation-roadmap-closure-report.md new file mode 100644 index 0000000..5019594 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-real-sample-validation-roadmap-closure-report.md @@ -0,0 +1,46 @@ +# Real Sample Validation Roadmap Closure Report + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md` +> Result: Completed + +## 1. Conclusion + +This roadmap has been completed inside its boundary. + +All selected mainline anchors now have executed real-sample records: + +1. `G2`: executed mismatch +2. `G1-E`: executed pass +3. `G3`: executed mismatch + +## 2. Scope Decision + +The next bounded implementation scope should target: + +1. `G3` real-sample archetype correction +2. `G2` real-sample contract correction + +## 3. Boundary Decision + +`G6/G7/G8` remain boundary families and do not enter the next roadmap. + +## 4. Deferred Decision + +`G4/G5` remain closed and do not enter the next roadmap. + +## 5. Updated Assets + +1. `tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. `tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json` +4. `docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md` +5. `docs/superpowers/reports/2026-04-19-real-sample-validation-triage-report.md` +6. `docs/superpowers/reports/2026-04-19-boundary-runtime-entry-decision.md` +7. `docs/superpowers/reports/2026-04-19-deferred-family-entry-decision.md` + +## 6. Final Status + +The strongest current execution pressure remains in the completed-roadmap mainline families. + +That means the next roadmap should stay bounded around mainline real-sample mismatch correction instead of drifting into boundary-family or deferred-family expansion. diff --git a/docs/superpowers/reports/2026-04-19-real-sample-validation-triage-report.md b/docs/superpowers/reports/2026-04-19-real-sample-validation-triage-report.md new file mode 100644 index 0000000..ad40aed --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-real-sample-validation-triage-report.md @@ -0,0 +1,64 @@ +# Real Sample Validation Triage Report + +> Date: 2026-04-19 +> Roadmap: `docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md` + +## 1. Triage Result + +The three current mainline anchors now classify as: + +1. `G2`: `mismatch-driven` +2. `G1-E`: `stable` +3. `G3`: `mismatch-driven` + +## 2. Family-Level Reading + +### G2 + +`G2` already had a real mismatch anchor from `台区线损大数据-月_周累计线损率统计分析`. + +The key failures remain: + +1. archetype mismatch +2. bootstrap mismatch +3. request contract missing +4. column semantics missing + +This is a mainline compiler-family gap. + +### G1-E + +`G1-E` remains the only stable positive real-sample anchor. + +Its current role is not to open a new scope but to act as the positive baseline against which future real-sample regressions are judged. + +### G3 + +`G3` now has an executed real mismatch anchor. + +The key finding is: + +1. the real sample was executed +2. the sample did not stay in `paginated_enrichment` +3. the run collapsed into `local_doc_pipeline` +4. the result then failed closed + +This is also a mainline compiler-family gap. + +## 3. Scope Decision + +The next bounded implementation scope should be: + +1. `mainline G3 real-sample archetype correction` +2. followed by `mainline G2 real-sample contract correction` + +The next scope should not be: + +1. a boundary-family roadmap for `G6/G7/G8` +2. a deferred-family entry round for `G4/G5` + +## 4. Reason + +The strongest validation pressure still comes from unresolved mainline gaps. + +As long as `G3` and `G2` both fail under real execution, opening boundary or deferred families would be scope drift. diff --git a/docs/superpowers/reports/2026-04-19-remaining-route-conflict-correction-report.md b/docs/superpowers/reports/2026-04-19-remaining-route-conflict-correction-report.md new file mode 100644 index 0000000..8ad8818 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-remaining-route-conflict-correction-report.md @@ -0,0 +1,54 @@ +# Remaining Route Conflict Correction Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-remaining-route-conflict-correction-plan.md` +> Decision Asset: `tests/fixtures/generated_scene/remaining_route_conflict_decisions_2026-04-19.json` + +## Scope + +This report covers only the `4` route conflicts from the follow-up full sweep. + +No timeout handling, structured fail-closed reporting, family baseline, or execution-board status was changed by this plan. + +## Final Decisions + +| Scene | Expected group | Previous inferred archetype | Final decision | +| --- | --- | --- | --- | +| `95598报修工单日管控` | `G3` | `host_bridge_workflow` | `valid-host-bridge-workflow` | +| `95598重要服务事项报备统计表` | `G3` | `host_bridge_workflow` | `valid-host-bridge-workflow` | +| `台区线损台区月度高负损预测` | `G2` | `host_bridge_workflow` | `valid-host-bridge-workflow` | +| `配网支撑月报(95598抢修统计报表)` | `G3` | `host_bridge_workflow` | `valid-host-bridge-workflow` | + +## Decision Rationale + +The targeted probes showed that the expected `G3` or `G2` contracts do not close for these scenes: + +1. the `G3` cases lack a closed main request, pagination plan, enrichment request, or join key +2. the `G2` case lacks a closed mode matrix and mode-specific request contracts +3. `host_bridge_workflow` remains the only complete execution path for all four scenes + +Therefore, this plan does not force the scenes back into `G3` or `G2`. + +## Verification + +Targeted probes were run for exactly the fixed `4` records. + +Regression checks: + +1. `cargo test --test scene_generator_test -- --nocapture` +2. `cargo test --test scene_generator_canonical_test -- --nocapture` + +Both passed. + +## Implementation Result + +No analyzer route patch was required by the evidence adjudication. + +The route conflict is resolved as a board-expectation/adjudication issue, not a generator bug to be patched in this plan. + +## Stop Statement + +This plan stops here. + +It does not update `scene_execution_board_2026-04-18.json`, does not promote any scene, and does not open another plan. + diff --git a/docs/superpowers/reports/2026-04-19-residual-13-followup-sweep-report.md b/docs/superpowers/reports/2026-04-19-residual-13-followup-sweep-report.md new file mode 100644 index 0000000..74b3445 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-residual-13-followup-sweep-report.md @@ -0,0 +1,74 @@ +# Residual 13 Follow-Up Sweep Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-residual-13-followup-sweep-and-reconciliation-plan.md` +> Parent Route: `Residual Route E` +> Status: Completed + +## Scope + +This report measures the cumulative delta after residual Routes A through D. It only reruns the fixed `13` residual scenes from the residual closure plan. + +This route does not update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json`, does not add a family, and does not modify analyzer or generator logic. + +## Input + +- `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` +- Fixed input bucket: `13` scenes with `framework-structured-fail-closed` +- Output root: `examples/residual_13_followup_sweep_2026-04-19` + +## Result + +| Raw status | Count | +| --- | ---: | +| `auto-pass` | 6 | +| `fail-closed-known` | 7 | +| `source-unreadable` | 0 | +| `missing-source` | 0 | +| `generator-error` | 0 | +| `unknown-report-status` | 0 | + +## By Archetype + +| Archetype | Count | +| --- | ---: | +| `paginated_enrichment` | 4 | +| `multi_mode_request` | 2 | +| `local_doc_pipeline` | 5 | +| `host_bridge_workflow` | 1 | +| `page_state_eval` | 1 | + +## Closed By Routes A-D + +These residuals now rerun as `auto-pass`: + +| Scene ID | Scene | Archetype | Readiness | +| --- | --- | --- | --- | +| `sweep-007-scene` | `95598供电服务月报` | `paginated_enrichment` | A | +| `sweep-018-scene` | `白银线损周报` | `multi_mode_request` | A | +| `sweep-039-scene` | `故障报修工单信息统计表` | `paginated_enrichment` | A | +| `sweep-068-scene` | `输变电设备运行分析报告` | `paginated_enrichment` | A | +| `sweep-071-scene` | `台区线损大数据-月_周累计线损率统计分析` | `multi_mode_request` | A | +| `sweep-084-scene` | `巡视计划完成情况自动检索` | `paginated_enrichment` | A | + +## Remaining Structured Fail-Closed + +These residuals remain structured fail-closed and are explained by Route C or Route D decisions: + +| Scene ID | Scene | Archetype | Decision | +| --- | --- | --- | --- | +| `sweep-033-scene` | `供电可靠率指标统计表` | `local_doc_pipeline` | hold for local-doc runtime roadmap | +| `sweep-034-scene` | `供电可靠性数据质量自查报告月报` | `local_doc_pipeline` | hold for local-doc runtime roadmap | +| `sweep-042-scene` | `国网金昌供电公司营商环境周例会报告` | `local_doc_pipeline` | hold for local-doc runtime roadmap | +| `sweep-051-scene` | `嘉峪关可靠性分析报告` | `local_doc_pipeline` | hold for local-doc runtime roadmap | +| `sweep-074-scene` | `同兴智能安全督查日报` | `local_doc_pipeline` | hold for local-doc runtime roadmap | +| `sweep-085-scene` | `业扩报装管理制度` | `host_bridge_workflow` | hold for host-bridge runtime roadmap | +| `sweep-091-scene` | `用户停电频次分析监测` | `page_state_eval` | isolate bootstrap target residual | + +## Output Asset + +- `tests/fixtures/generated_scene/residual_13_followup_sweep_2026-04-19.json` + +## Conclusion + +Residual follow-up reduced the fixed residual set from `13` structured fail-closed records to `7` structured fail-closed records. No scene is unreadable, missing, or unresolved under this route. diff --git a/docs/superpowers/reports/2026-04-19-residual-13-reconciliation-candidates-report.md b/docs/superpowers/reports/2026-04-19-residual-13-reconciliation-candidates-report.md new file mode 100644 index 0000000..5435a96 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-residual-13-reconciliation-candidates-report.md @@ -0,0 +1,62 @@ +# Residual 13 Reconciliation Candidates Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-residual-13-followup-sweep-and-reconciliation-plan.md` +> Policy Source: `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` +> Status: Completed + +## Scope + +This report applies Route 6 promotion policy to the residual 13 follow-up sweep results. It publishes reconciliation candidates only. + +This report does not update the official execution board. + +## Candidate Summary + +| Candidate status | Count | +| --- | ---: | +| `framework-auto-pass-candidate` | 6 | +| `framework-structured-fail-closed` | 7 | +| `source-unreadable` | 0 | +| `unresolved-followup-status` | 0 | + +## Auto-Pass Candidates + +The following scenes are candidates for future explicit board reconciliation as framework auto-pass candidates: + +| Scene ID | Scene | Archetype | +| --- | --- | --- | +| `sweep-007-scene` | `95598供电服务月报` | `paginated_enrichment` | +| `sweep-018-scene` | `白银线损周报` | `multi_mode_request` | +| `sweep-039-scene` | `故障报修工单信息统计表` | `paginated_enrichment` | +| `sweep-068-scene` | `输变电设备运行分析报告` | `paginated_enrichment` | +| `sweep-071-scene` | `台区线损大数据-月_周累计线损率统计分析` | `multi_mode_request` | +| `sweep-084-scene` | `巡视计划完成情况自动检索` | `paginated_enrichment` | + +## Structured Fail-Closed Candidates + +The following scenes remain framework-supported but are not execution-pass candidates: + +| Scene ID | Scene | Archetype | Overlay | +| --- | --- | --- | --- | +| `sweep-033-scene` | `供电可靠率指标统计表` | `local_doc_pipeline` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-034-scene` | `供电可靠性数据质量自查报告月报` | `local_doc_pipeline` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-042-scene` | `国网金昌供电公司营商环境周例会报告` | `local_doc_pipeline` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-051-scene` | `嘉峪关可靠性分析报告` | `local_doc_pipeline` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-074-scene` | `同兴智能安全督查日报` | `local_doc_pipeline` | `hold-for-local-doc-runtime-roadmap` | +| `sweep-085-scene` | `业扩报装管理制度` | `host_bridge_workflow` | `hold-for-host-bridge-runtime-roadmap` | +| `sweep-091-scene` | `用户停电频次分析监测` | `page_state_eval` | `isolate-bootstrap-target-residual` | + +## Board Policy + +Per Route 6 policy, these candidates must not directly update `scene_execution_board_2026-04-18.json`. + +A future dedicated board reconciliation plan may consume this asset and decide official status updates. + +## Output Asset + +- `tests/fixtures/generated_scene/residual_13_reconciliation_candidates_2026-04-19.json` + +## Conclusion + +Residual Route E has no unresolved follow-up state. The remaining residual count is `7`, all explained by boundary/runtime or bootstrap isolation decisions. diff --git a/docs/superpowers/reports/2026-04-19-residual-runtime-roadmap-prioritization-report.md b/docs/superpowers/reports/2026-04-19-residual-runtime-roadmap-prioritization-report.md new file mode 100644 index 0000000..d8ccf89 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-residual-runtime-roadmap-prioritization-report.md @@ -0,0 +1,47 @@ +# Residual Runtime Roadmap Prioritization Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-residual-runtime-roadmap-prioritization-plan.md` +> Status: Completed + +## Scope + +This is a decision-only step after official board reconciliation. It compares the three residual roadmap inputs and selects exactly one next roadmap. + +This plan did not modify analyzer/generator logic and did not update the official execution board. + +## Inputs + +- `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +- `tests/fixtures/generated_scene/official_board_reconciliation_2026-04-19.json` + +## Candidate Roadmaps + +| Candidate | Residual count | Score | Decision | +| --- | ---: | ---: | --- | +| `local-doc runtime roadmap` | 5 | 51 | selected | +| `bootstrap target normalization roadmap` | 1 | 11 | deferred | +| `host-bridge runtime roadmap` | 1 | 8 | deferred | + +## Selected Roadmap + +`local-doc runtime roadmap` is selected because it covers the largest residual bucket: five `local_doc_pipeline` scenes. + +Selected next roadmap files: + +- `docs/superpowers/specs/2026-04-19-local-doc-runtime-roadmap-design.md` +- `docs/superpowers/plans/2026-04-19-local-doc-runtime-roadmap-plan.md` + +## Deferred Roadmaps + +`host-bridge runtime roadmap` is deferred because it currently covers one residual and carries higher risk of affecting existing host-bridge auto-pass paths. + +`bootstrap target normalization roadmap` is deferred because it currently covers one residual and should remain isolated until the higher-impact local-doc bucket is handled. + +## Output Asset + +- `tests/fixtures/generated_scene/residual_runtime_roadmap_prioritization_2026-04-19.json` + +## Conclusion + +The next bounded roadmap is local-doc runtime. Do not start host-bridge runtime or bootstrap normalization until this selected roadmap is closed or explicitly superseded. diff --git a/docs/superpowers/reports/2026-04-19-scene-skill-102-final-materialization-report.md b/docs/superpowers/reports/2026-04-19-scene-skill-102-final-materialization-report.md new file mode 100644 index 0000000..d79ee15 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-scene-skill-102-final-materialization-report.md @@ -0,0 +1,63 @@ +# Scene Skill 102 Final Materialization Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-scene-skill-102-final-materialization-plan.md` + +## Scope + +This plan materialized the final skill package set for the current 102 scene framework closure. + +Existing `examples/*` follow-up roots were not cleaned or modified. The final materialization was written to: + +`D:/data/ideaSpace/rust/sgClaw/claw-new/examples/scene_skill_102_final_materialization_2026-04-19` + +## Summary + +| Metric | Count | +| --- | ---: | +| total scenes | 102 | +| attempted | 102 | +| generated skill directories | 102 | +| required file checks passed | 101 | +| failures | 1 | + +Duration: `2036.246` seconds + +## Readiness Distribution + +| Readiness | Count | +| --- | ---: | +| `A` | 100 | +| `C` | 2 | + +## Archetype Distribution + +| Archetype | Count | +| --- | ---: | +| `host_bridge_workflow` | 27 | +| `local_doc_pipeline` | 5 | +| `multi_endpoint_inventory` | 2 | +| `multi_mode_request` | 10 | +| `page_state_eval` | 2 | +| `paginated_enrichment` | 51 | +| `single_request_enrichment` | 5 | + +## Assets + +- `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +- `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` +- `D:/data/ideaSpace/rust/sgClaw/claw-new/examples/scene_skill_102_final_materialization_2026-04-19` + +## Failures + +| Scene id | Scene name | Error | Missing files | +| --- | --- | --- | --- | +| `sweep-012-scene` | `业扩报装管理制度` | `generator-exit-nonzero` | `SKILL.toml, SKILL.md, scene.toml, scripts/*` | + +## Closure + +This materialization plan published the required final materialization assets: a 102-row manifest, an explicit failures asset, the isolated output root, and this report. + +The final materialized skill asset set is not yet 102/102 complete because `sweep-012-scene` failed package materialization and is missing `SKILL.toml`, `SKILL.md`, `scene.toml`, and `scripts/*`. + +Next work should not start static/mock validation yet. It should first use a separate bounded recovery plan for `sweep-012-scene`, then refresh this final materialization manifest. This plan does not perform recovery, static, mock, or production validation. diff --git a/docs/superpowers/reports/2026-04-19-scene-skill-102-framework-closure-rollup-report.md b/docs/superpowers/reports/2026-04-19-scene-skill-102-framework-closure-rollup-report.md new file mode 100644 index 0000000..6637d45 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-scene-skill-102-framework-closure-rollup-report.md @@ -0,0 +1,40 @@ +# Scene Skill 102 Framework Closure Rollup Report + +> Date: 2026-04-19 +> Plan: `2026-04-19-102-framework-closure-rollup-plan.md` +> Parent framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` + +## Result + +| Metric | Count | +| --- | ---: | +| total scenes | 102 | +| `framework-auto-pass` | 102 | +| `framework-structured-fail-closed` | 0 | +| `source-unreadable` | 0 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| `misclassified-unresolved` | 0 | +| `unresolved-followup-status` | 0 | +| unresolved total | 0 | + +Closure status: `complete` + +## Remaining Structured Fail-Closed + +None. + +## Interpretation + +The 102-scene set is closed at the framework layer: every scene now has a framework auto-pass status in the official board. + +This is not the same as saying every scene has real-sample `executed-pass` validation. Real-sample validation remains a separate layer. The framework closure means the generator/analyzer framework can classify and generate a bounded skill package path for all 102 scenes without unresolved timeout, unsupported-family, unresolved route conflict, or structured fail-closed residual. + +## Inputs + +- `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +- `tests/fixtures/generated_scene/final_2_official_board_reconciliation_refresh_2026-04-19.json` + +## Closure + +The `102` full coverage parent framework has reached framework-level closure. Further work should be planned as a new validation or production-hardening roadmap, not as continuation of unresolved framework coverage. diff --git a/docs/superpowers/reports/2026-04-19-structured-fail-closed-bootstrap-isolation-report.md b/docs/superpowers/reports/2026-04-19-structured-fail-closed-bootstrap-isolation-report.md new file mode 100644 index 0000000..b661e08 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-structured-fail-closed-bootstrap-isolation-report.md @@ -0,0 +1,14 @@ +# Structured Fail-Closed Bootstrap Isolation Report + +> Date: 2026-04-19 + +## Bootstrap Records + +| Scene ID | Archetype | Reason | Decision | +| --- | --- | --- | --- | +| `sweep-012-scene` | `host_bridge_workflow` | `workflow evidence is incomplete before package generation` | `defer_to_bootstrap_normalization` | +| `sweep-066-scene` | `page_state_eval` | `bootstrap_target` | `defer_to_bootstrap_normalization` | + +## Scope Confirmation + +No login recovery or bootstrap auto-recovery is implemented in this roadmap. diff --git a/docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-coverage-delta-report.md b/docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-coverage-delta-report.md new file mode 100644 index 0000000..be01281 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-coverage-delta-report.md @@ -0,0 +1,109 @@ +# Structured Fail-Closed Improvement Coverage Delta Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md` +> Baseline: `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` +> Follow-up: `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +## Purpose + +Measure the impact of the bounded structured fail-closed improvement cycle against the reconciled `102` scene baseline. + +This report does not promote scenes, update `scene_execution_board_2026-04-18.json`, introduce new scene families, or relax readiness gates. + +## Baseline + +The reconciled baseline before this roadmap was: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 2 | +| Total | 102 | + +The baseline actionable count was `100`, using: + +`auto-pass + fail-closed-known + adjudicated-valid-host-bridge` + +## Follow-Up Sweep + +The follow-up sweep was run over the same fixed `102` scene set and wrote results to: + +`tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +Follow-up status: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 47 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 3 | +| Total | 102 | + +Follow-up actionable count is `99`, using: + +`auto-pass + fail-closed-known + adjudicated-valid-host-bridge` + +## Delta + +| Metric | Baseline | Follow-up | Delta | +| --- | ---: | ---: | ---: | +| `auto-pass` | 48 | 48 | 0 | +| `fail-closed-known` | 48 | 47 | -1 | +| `adjudicated-valid-host-bridge` | 4 | 4 | 0 | +| `source-unreadable` | 2 | 3 | +1 | +| actionable count | 100 | 99 | -1 | + +## Interpretation + +This roadmap did not increase `auto-pass` coverage. + +The bounded implementation improved the structure of pre-package fail-closed reports by preserving the full `SceneIr` as `contractSnapshot`. This makes fail-closed records more diagnosable, but it does not close missing contracts by itself. + +The follow-up sweep exposed one new timeout relative to the reconciled baseline: + +| Scene id | Scene | Baseline status | Follow-up status | Reason | +| --- | --- | --- | --- | --- | +| `sweep-040-scene` | `嘉峪关日报` | `fail-closed-known` | `source-unreadable` | `generator timeout after 45s` | + +The two existing timeout records remain timeout records: + +| Scene id | Scene | Status | +| --- | --- | --- | +| `sweep-015-scene` | `任务报表` | `source-unreadable` | +| `sweep-025-scene` | `力禾动环系统巡视记录` | `source-unreadable` | + +## Remaining Structured Fail-Closed Buckets + +The follow-up sweep still has `47` structured fail-closed records: + +| Archetype | Reason | Count | +| --- | --- | ---: | +| `paginated_enrichment` | `workflow evidence is incomplete before package generation` | 34 | +| `local_doc_pipeline` | `workflow evidence is incomplete before package generation` | 5 | +| `multi_mode_request` | `workflow evidence is incomplete before package generation` | 4 | +| `single_request_enrichment` | `workflow evidence is incomplete before package generation` | 2 | +| `host_bridge_workflow` | `workflow evidence is incomplete before package generation` | 1 | +| `page_state_eval` | `structured fail-closed report emitted` | 1 | + +The largest remaining bucket is still `paginated_enrichment`, now `34` records. + +## Boundaries Preserved + +This follow-up did not: + +1. update `scene_execution_board_2026-04-18.json` +2. promote scenes +3. introduce a new scene family +4. reopen adjudicated host-bridge records +5. handle timeout records as implementation targets +6. loosen readiness gates + +## Conclusion + +The roadmap produced a better fail-closed evidence artifact, but it did not increase `auto-pass` coverage. The measured status is slightly worse on broad actionable count because one previous fail-closed scene timed out in the follow-up sweep. + +The next input, outside this roadmap, is a bounded timeout regression diagnostic for `sweep-040-scene` and the two persistent timeout records, or a new recovery roadmap focused on the remaining `34` `paginated_enrichment` fail-closed records. diff --git a/docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-roadmap-closure-report.md b/docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-roadmap-closure-report.md new file mode 100644 index 0000000..c256052 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-roadmap-closure-report.md @@ -0,0 +1,170 @@ +# Structured Fail-Closed Improvement Roadmap Closure Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md` +> Status: Closed + +## Scope + +This roadmap handled the `48` structured fail-closed records from the reconciled `102` scene sweep. + +The roadmap boundary explicitly excluded: + +1. new scene families +2. `G4/G5` +3. login recovery +4. full host runtime transport +5. local document attachment runtime +6. `scene_execution_board_2026-04-18.json` updates +7. direct scene promotion +8. timeout implementation work +9. readiness gate relaxation + +Those boundaries were preserved. + +## Completed Phases + +### Phase 0: Freeze Structured Fail-Closed Baseline + +Completed. + +Input baseline: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 2 | +| Total | 102 | + +Only `fail-closed-known` records entered the inventory phase. + +### Phase 1: Inventory and Gap Taxonomy + +Completed. + +Deliverables: + +1. `tests/fixtures/generated_scene/structured_fail_closed_inventory_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-structured-fail-closed-inventory-report.md` + +Inventory summary: + +| Primary label | Count | +| --- | ---: | +| `enrichment_request_missing` | 23 | +| `export_plan_missing` | 12 | +| `local_doc_contract_missing` | 5 | +| `mode_request_contract_missing` | 4 | +| `single_request_enrichment_contract_missing` | 2 | +| `bootstrap_target_unresolved` | 2 | + +The `paginated_enrichment` bucket was split into: + +| Primary label | Count | +| --- | ---: | +| `enrichment_request_missing` | 23 | +| `export_plan_missing` | 12 | + +### Phase 2: G3 Paginated Enrichment Recovery Slice + +Completed as a bounded evidence-preservation slice. + +The top repeated G3 gap was `enrichment_request_missing`, but there was not enough traceable evidence to safely synthesize missing enrichment requests without relaxing the `G3` contract. + +Instead, the implementation improved structured fail-closed observability by embedding `contractSnapshot` in pre-package fail-closed reports. This preserves the full `SceneIr` for future recovery analysis while keeping the result fail-closed. + +Changed implementation: + +1. `src/generated_scene/generator.rs` +2. `tests/scene_generator_test.rs` + +This slice did not: + +1. hardcode scene names +2. relax gates +3. convert incomplete `G3` records into pass +4. change canonical `G3` behavior + +### Phase 3: Small-Bucket Recovery Slice + +Completed as inspection and explicit deferral. + +Deliverable: + +`docs/superpowers/reports/2026-04-19-structured-fail-closed-small-bucket-decision-report.md` + +No small-bucket implementation slice was started because the largest remaining value stayed in the `G3` structured fail-closed bucket, and the plan allowed at most one non-G3 bounded slice. + +### Phase 4: Bootstrap Target Isolation + +Completed. + +Deliverable: + +`docs/superpowers/reports/2026-04-19-structured-fail-closed-bootstrap-isolation-report.md` + +Bootstrap target records were kept out of G3 and small-bucket recovery. No login recovery or bootstrap auto-recovery was implemented. + +### Phase 5: Follow-Up Sweep and Coverage Delta + +Completed. + +Deliverables: + +1. `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-coverage-delta-report.md` + +Follow-up summary: + +| Status | Baseline | Follow-up | Delta | +| --- | ---: | ---: | ---: | +| `auto-pass` | 48 | 48 | 0 | +| `fail-closed-known` | 48 | 47 | -1 | +| `adjudicated-valid-host-bridge` | 4 | 4 | 0 | +| `source-unreadable` | 2 | 3 | +1 | + +The follow-up did not improve pass count and exposed one additional timeout: + +`sweep-040-scene` / `嘉峪关日报` + +## Validation + +The implementation slice was validated with: + +1. `cargo test --test scene_generator_test generator_blocks_incomplete_paginated_enrichment_workflow -- --nocapture` +2. `cargo test --test scene_generator_canonical_test -- --nocapture` + +Both passed. + +The fixed `102` scene follow-up sweep completed and wrote: + +`tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +## Closure Decision + +This roadmap is closed. + +It completed its required inventory, bounded implementation, small-bucket inspection, bootstrap isolation, follow-up sweep, and delta report. + +The outcome is not a coverage increase. The outcome is better structured fail-closed evidence plus a measured follow-up baseline: + +| Metric | Result | +| --- | --- | +| `auto-pass` coverage | unchanged at `48/102` | +| broad actionable count | `100 -> 99` | +| remaining structured fail-closed | `47` | +| remaining timeout | `3` | + +## Next Inputs + +The next work is outside this roadmap. + +Highest-value inputs are: + +1. bounded timeout regression diagnostic for `sweep-040-scene` +2. timeout diagnostic for the two persistent timeout records +3. a new bounded recovery roadmap for the remaining `34` `paginated_enrichment` structured fail-closed records + +No next roadmap is started by this closure report. diff --git a/docs/superpowers/reports/2026-04-19-structured-fail-closed-inventory-report.md b/docs/superpowers/reports/2026-04-19-structured-fail-closed-inventory-report.md new file mode 100644 index 0000000..3ed5066 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-structured-fail-closed-inventory-report.md @@ -0,0 +1,42 @@ +# Structured Fail-Closed Inventory Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md` +> Inventory: `tests/fixtures/generated_scene/structured_fail_closed_inventory_2026-04-19.json` + +## Summary + +Total structured fail-closed records: `48`. + +| Inferred archetype | Count | +| --- | ---: | +| `host_bridge_workflow` | 1 | +| `local_doc_pipeline` | 5 | +| `multi_mode_request` | 4 | +| `page_state_eval` | 1 | +| `paginated_enrichment` | 35 | +| `single_request_enrichment` | 2 | + +## Primary Missing Contract Labels + +| Label | Count | +| --- | ---: | +| `bootstrap_target_unresolved` | 2 | +| `enrichment_request_missing` | 23 | +| `export_plan_missing` | 12 | +| `local_doc_contract_missing` | 5 | +| `mode_request_contract_missing` | 4 | +| `single_request_enrichment_contract_missing` | 2 | + +## G3 Bucket + +| G3 primary label | Count | +| --- | ---: | +| `enrichment_request_missing` | 23 | +| `export_plan_missing` | 12 | + +The largest G3 repeated gap is the first candidate for bounded recovery analysis. Recovery remains conditional: fields must be traceable to source evidence and existing G3 canonical/real-sample regressions must stay green. + +## Scope Confirmation + +No implementation was performed in this inventory phase. No execution board status was changed. diff --git a/docs/superpowers/reports/2026-04-19-structured-fail-closed-small-bucket-decision-report.md b/docs/superpowers/reports/2026-04-19-structured-fail-closed-small-bucket-decision-report.md new file mode 100644 index 0000000..2bf8941 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-structured-fail-closed-small-bucket-decision-report.md @@ -0,0 +1,17 @@ +# Structured Fail-Closed Small-Bucket Decision Report + +> Date: 2026-04-19 + +## Scope + +This report inspects non-G3 structured fail-closed buckets after Phase 2. It does not start G8 attachment runtime, full host runtime transport, or login recovery. + +| Archetype | Primary label | Count | Decision | +| --- | --- | ---: | --- | +| `local_doc_pipeline` | `local_doc_contract_missing` | 5 | `defer_no_attachment_runtime` | +| `multi_mode_request` | `mode_request_contract_missing` | 4 | `defer_after_g3_no_mode_pattern_implemented` | +| `single_request_enrichment` | `single_request_enrichment_contract_missing` | 2 | `defer_small_count` | + +## Decision + +No small-bucket implementation slice is started in this roadmap. The G3 slice is intentionally limited to structured evidence preservation, and the remaining small buckets either require runtime capability or have too little repeated evidence to justify a safe generic correction. diff --git a/docs/superpowers/reports/2026-04-19-timeout-budget-rerun-hygiene-report.md b/docs/superpowers/reports/2026-04-19-timeout-budget-rerun-hygiene-report.md new file mode 100644 index 0000000..196a133 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-timeout-budget-rerun-hygiene-report.md @@ -0,0 +1,76 @@ +# Timeout Budget and Rerun Hygiene Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-timeout-budget-rerun-hygiene-plan.md` +> Source diagnostic: `tests/fixtures/generated_scene/timeout_regression_diagnostic_2026-04-19.json` + +## Scope + +This plan only converts the three timeout diagnostic records into a rerun-hygiene layer. + +It does not: + +1. change analyzer or generator code +2. update `scene_execution_board_2026-04-18.json` +3. promote scenes +4. rerun the full `102` sweep +5. treat rerun success as validated scene pass + +## Frozen Diagnostic Input + +| Diagnostic label | Count | +| --- | ---: | +| `timeout-rerun-pass` | 2 | +| `timeout-rerun-fail-closed` | 1 | +| Total | 3 | + +## Hygiene Mapping + +| Diagnostic label | Hygiene status | +| --- | --- | +| `timeout-rerun-pass` | `rerun-resolved-pass` | +| `timeout-rerun-fail-closed` | `rerun-resolved-fail-closed` | +| `timeout-rerun-timeout` | `rerun-still-timeout` | +| `timeout-rerun-error` | `rerun-error` | +| `timeout-large-source` | `rerun-still-timeout` | +| `timeout-command-hang` | `rerun-still-timeout` | +| `timeout-nondeterministic` | `rerun-error` | +| `timeout-source-scan-heavy` | `rerun-still-timeout` | +| `timeout-unknown` | `rerun-error` | + +## Hygiene Results + +| Scene id | Scene | Diagnostic label | Hygiene status | Elapsed seconds | Result | +| --- | --- | --- | --- | ---: | --- | +| `sweep-015-scene` | `????` | `timeout-rerun-pass` | `rerun-resolved-pass` | 74.76 | readiness `A` | +| `sweep-025-scene` | `??????????` | `timeout-rerun-pass` | `rerun-resolved-pass` | 49.03 | readiness `A` | +| `sweep-040-scene` | `?????` | `timeout-rerun-fail-closed` | `rerun-resolved-fail-closed` | 45.91 | fail-closed, readiness `C` | + +## Summary + +| Hygiene status | Count | +| --- | ---: | +| `rerun-resolved-pass` | 2 | +| `rerun-resolved-fail-closed` | 1 | +| `rerun-still-timeout` | 0 | +| `rerun-error` | 0 | + +## Interpretation + +1. The timeout bucket should not be treated as a single unreadable category. +2. Two timeout records are budget-sensitive pass candidates under the bounded rerun budget. +3. `sweep-040-scene` should not stay grouped with hard unreadable inputs. Under rerun hygiene, it belongs to `rerun-resolved-fail-closed`, which means it should feed the structured fail-closed layer rather than the unreadable bucket. +4. This remains a hygiene layer only. None of these three records are promoted or merged into the execution board by this report. + +## Recommendation + +Use the rerun hygiene layer before any timeout implementation work or execution-board update. + +The highest-value follow-up is to apply this hygiene rule to future sweeps so budget-sensitive scenes are separated into: + +1. `rerun-resolved-pass` +2. `rerun-resolved-fail-closed` +3. `rerun-still-timeout` +4. `rerun-error` + +instead of collapsing all timeout outcomes into `source-unreadable`. diff --git a/docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md b/docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md new file mode 100644 index 0000000..3020c8d --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md @@ -0,0 +1,72 @@ +# Timeout Regression Diagnostic Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-timeout-regression-diagnostic-plan.md` +> Follow-up input: `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +## Scope + +This diagnostic only handled the three timeout records from the structured fail-closed improvement follow-up sweep. + +No analyzer or generator logic was changed. + +No execution board state was updated. + +## Frozen Timeout Inputs + +| Scene id | Scene | Type | Previous reconciled status | Follow-up status | +| --- | --- | --- | --- | --- | +| `sweep-015-scene` | `????` | persistent timeout | `source-unreadable` | `source-unreadable` | +| `sweep-025-scene` | `??????????` | persistent timeout | `source-unreadable` | `source-unreadable` | +| `sweep-040-scene` | `?????` | regression timeout | `fail-closed-known` | `source-unreadable` | + +## Source Diagnostics + +| Scene id | File count | HTML | JS | Total bytes | +| --- | ---: | ---: | ---: | ---: | +| `sweep-015-scene` | 93 | 10 | 21 | 96,922,420 | +| `sweep-025-scene` | 137 | 51 | 38 | 11,274,750 | +| `sweep-040-scene` | 50 | 2 | 21 | 5,037,507 | + +Interpretation: + +1. `sweep-015-scene` is the largest source set by total bytes and contains many zip artifacts. +2. `sweep-025-scene` is not the largest by bytes, but it has the highest combined HTML and JavaScript file count. +3. `sweep-040-scene` is materially smaller than the two persistent timeout records, so its regression does not look like a pure source-scale problem. + +## Diagnostic Rerun + +A bounded diagnostic rerun was executed for each timeout record with a `90s` timeout budget. + +| Scene id | Elapsed seconds | Exit code | Timed out | Generation report | Result | +| --- | ---: | ---: | --- | --- | --- | +| `sweep-015-scene` | 74.76 | 0 | `false` | present | readiness `A` | +| `sweep-025-scene` | 49.03 | 0 | `false` | present | readiness `A` | +| `sweep-040-scene` | 45.91 | 1 | `false` | present | fail-closed, readiness `C` | + +## Final Diagnostic Labels + +| Scene id | Final label | Secondary labels | +| --- | --- | --- | +| `sweep-015-scene` | `timeout-rerun-pass` | `large-total-source`, `zip-heavy-source` | +| `sweep-025-scene` | `timeout-rerun-pass` | `source-scan-heavy`, `high-html-js-count` | +| `sweep-040-scene` | `timeout-rerun-fail-closed` | `regression-timeout`, `budget-sensitive-timeout` | + +## Conclusions + +1. The two persistent timeout records are not hard failures. Under a bounded `90s` diagnostic rerun, both completed successfully. +2. `sweep-040-scene` is the only real regression timeout. Under the same `90s` diagnostic rerun, it resolved into a structured fail-closed result instead of timing out. +3. The current timeout bucket is therefore mixed: + - two records are budget-sensitive successful runs + - one record is a budget-sensitive regression that should really be treated as a structured fail-closed case after rerun +4. The next step should not be timeout implementation first. The higher-value next step is rerun hygiene and timeout-budget policy, so that scenes like `sweep-040-scene` do not get miscounted as unreadable when they can resolve into a concrete fail-closed result. + +## Boundaries Preserved + +This diagnostic did not: + +1. change analyzer or generator code +2. update `scene_execution_board_2026-04-18.json` +3. promote any scene +4. rerun the full `102` sweep +5. start an implementation correction plan diff --git a/docs/superpowers/reports/2026-04-19-timeout-rerun-hygiene-integration-report.md b/docs/superpowers/reports/2026-04-19-timeout-rerun-hygiene-integration-report.md new file mode 100644 index 0000000..f05b8d7 --- /dev/null +++ b/docs/superpowers/reports/2026-04-19-timeout-rerun-hygiene-integration-report.md @@ -0,0 +1,63 @@ +# Timeout Rerun Hygiene Integration Report + +> Date: 2026-04-19 +> Plan: `docs/superpowers/plans/2026-04-19-timeout-rerun-hygiene-integration-plan.md` +> Follow-up input: `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` +> Hygiene input: `tests/fixtures/generated_scene/timeout_budget_rerun_hygiene_2026-04-19.json` + +## Scope + +This integration only changes the reporting and reconciliation interpretation layer. + +It does not: + +1. change analyzer or generator code +2. rerun the `102` sweep +3. update `scene_execution_board_2026-04-18.json` +4. promote scenes +5. start timeout implementation fixes + +## Raw Follow-Up Timeout Summary + +The raw follow-up sweep still reports: + +| Raw status | Count | +| --- | ---: | +| `source-unreadable` | 3 | + +## Hygiene-Aware Timeout Interpretation + +After applying the timeout rerun hygiene overlay, the same three records become: + +| Hygiene interpretation | Count | +| --- | ---: | +| `timeout-as-pass-candidate` | 2 | +| `timeout-as-fail-closed-candidate` | 1 | +| `timeout-still-unreadable` | 0 | +| `timeout-rerun-error` | 0 | + +## Overlay Records + +| Scene id | Scene | Raw status | Hygiene status | Hygiene interpretation | +| --- | --- | --- | --- | --- | +| `sweep-015-scene` | `????` | `source-unreadable` | `rerun-resolved-pass` | `timeout-as-pass-candidate` | +| `sweep-025-scene` | `??????????` | `source-unreadable` | `rerun-resolved-pass` | `timeout-as-pass-candidate` | +| `sweep-040-scene` | `?????` | `source-unreadable` | `rerun-resolved-fail-closed` | `timeout-as-fail-closed-candidate` | + +## Interpretation + +1. The raw follow-up timeout count remains preserved as `3`. +2. The hygiene-aware layer shows that none of these three records should remain grouped as hard unreadable sources. +3. Two timeout records are better understood as budget-sensitive pass candidates. +4. `sweep-040-scene` is better understood as a budget-sensitive fail-closed candidate, not as a persistent unreadable source. +5. Future reconciliation and coverage reporting should use both views together: + - raw timeout count + - hygiene-aware timeout interpretation + +## Conclusion + +Timeout rerun hygiene is now integrated into a reconciliation-friendly reporting layer. + +This integration is reporting-only. It does not update scene status, execution board state, or promotion state. + +The next use of this integration should be inside future sweep/reconciliation reporting so raw `source-unreadable` and hygiene-aware timeout interpretation are published together. diff --git a/docs/superpowers/reports/2026-04-20-deterministic-keyword-scoring-refinement-report.md b/docs/superpowers/reports/2026-04-20-deterministic-keyword-scoring-refinement-report.md new file mode 100644 index 0000000..bc3aa42 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-deterministic-keyword-scoring-refinement-report.md @@ -0,0 +1,54 @@ +# Deterministic Keyword Scoring Refinement Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-deterministic-keyword-scoring-refinement-plan.md` + +## Scope + +This plan refined deterministic manifest keywords and ran dispatch dry-run checks only. It did not execute browser scripts, repair `sweep-012-scene`, update the official board, or modify runtime dispatch code. + +## Summary + +| Metric | Count | +| --- | ---: | +| `totalCompletePackages` | 101 | +| `beforeReady` | 92 | +| `beforeAmbiguous` | 9 | +| `afterReady` | 101 | +| `afterAmbiguous` | 0 | +| `afterNoMatch` | 0 | +| `afterOther` | 0 | +| `fixedGapCount` | 9 | +| `fixedGapResolved` | 9 | + +## Refinement Decisions + +All complete packages now use exact full-scene-name include keywords. Pair-specific `exclude_keywords` are used only where one scene name is contained by a more specific scene name. + +| Scene ID | Scene name | Keywords | Exclude keywords | +| --- | --- | --- | --- | +| `sweep-026-scene` | 县区公司故障明细 | `县区公司故障明细` | `` | +| `sweep-034-scene` | 售电收入日统计排程预测 | `售电收入日统计排程预测` | `` | +| `sweep-037-scene` | 嘉峪关可靠性分析报告 | `嘉峪关可靠性分析报告` | `` | +| `sweep-038-scene` | 嘉峪关周报 | `嘉峪关周报` | `` | +| `sweep-039-scene` | 嘉峪关故障明细 | `嘉峪关故障明细` | `` | +| `sweep-040-scene` | 嘉峪关日报 | `嘉峪关日报` | `` | +| `sweep-041-scene` | 嘉峪关月报 | `嘉峪关月报` | `` | +| `sweep-044-scene` | 国网金昌供电公司指挥中心生产例会报告 | `国网金昌供电公司指挥中心生产例会报告` | `` | +| `sweep-045-scene` | 国网金昌供电公司营商环境周例会报告 | `国网金昌供电公司营商环境周例会报告` | `` | +| `sweep-097-scene` | 重要服务事项报备统计 | `重要服务事项报备统计` | `95598` | +| `sweep-059-scene` | 故障明细 | `故障明细` | `县区公司, 嘉峪关` | +| `sweep-033-scene` | 售电收入日统计 | `售电收入日统计` | `排程预测` | + +## Runtime Dispatch Decision + +No runtime dispatch scoring change is needed for full-scene-name deterministic invocation. Manifest-level keyword and exclude-keyword refinement resolves all ambiguity gaps for the complete 101-package set. + +## Assets + +- `tests/fixtures/generated_scene/deterministic_keyword_scoring_refinement_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` + +## Closure + +The keyword/scoring refinement plan is complete. The remaining blocker for 102/102 deterministic callable skills is `sweep-012-scene` materialization recovery; parameter correctness for the 10 param-bearing scenes remains a separate hardening topic. diff --git a/docs/superpowers/reports/2026-04-20-final-skill-human-readable-index-report.md b/docs/superpowers/reports/2026-04-20-final-skill-human-readable-index-report.md new file mode 100644 index 0000000..271ee4b --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-final-skill-human-readable-index-report.md @@ -0,0 +1,33 @@ +# Final Skill Human-Readable Index Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-final-skill-human-readable-index-plan.md` + +## Scope + +This plan added human-readable lookup and metadata to the final materialized skill set. It did not rerun generation, repair failed packages, update the official execution board, or modify generated scripts. + +## Outputs + +- `examples/scene_skill_102_final_materialization_2026-04-19/SCENE_INDEX.md` +- `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +- updated complete package `SKILL.toml` files with `display_name`, `scene_id`, `scene_name`, `archetype`, and `readiness` +- updated complete package `SKILL.md` files with readable scene summaries + +## Summary + +| Metric | Count | +| --- | ---: | +| total scenes | 102 | +| materialized skill packages | 101 | +| failed packages | 1 | +| metadata updated | 101 | +| metadata skipped | 1 | + +## Remaining Blocker + +`sweep-012-scene / ????????` still lacks a complete skill package and was skipped for per-skill metadata normalization. It remains the input for a separate bounded materialization recovery plan. + +## Closure + +The human-readable index and metadata normalization plan is complete. The next step remains a bounded recovery plan for `sweep-012-scene` before static or mock validation starts. diff --git a/docs/superpowers/reports/2026-04-20-generated-scene-resolver-request-mapping-hardening-report.md b/docs/superpowers/reports/2026-04-20-generated-scene-resolver-request-mapping-hardening-report.md new file mode 100644 index 0000000..3243ef7 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-generated-scene-resolver-request-mapping-hardening-report.md @@ -0,0 +1,54 @@ +# Generated Scene Resolver Request Mapping Hardening Report + +- Date: `2026-04-20` +- Route: `resolver_request_mapping_hardening` +- Status: `completed` + +## Bounded Slice + +This route implemented the first reusable request-mapping slice in the highest-signal parameterized bucket: + +1. `multi_mode_request` scenes with explicit `org` / `period` params +2. request templates that expose request-field tokens such as `orgno`, `fdate`, `weekSfdate`, `weekEfdate` +3. generated scenes that previously had no explicit request-mapping metadata + +## Implemented Delta + +1. Added explicit request-field mapping metadata to generated-scene IR through `ModeIr.requestFieldMappings`. +2. Emitted route-local mapping metadata into generated `scene.toml` via `[[request_mappings]]`. +3. Updated generated `multi_mode_request` scripts to: + - normalize `period_payload` + - apply explicit request mappings + - stop blindly merging all raw resolver args into request bodies +4. Kept this route bounded to the first reusable slice instead of attempting 102-scene closure. + +## Mapping Examples + +| Source field | Target field | Mode | +| --- | --- | --- | +| `org_code` | `orgno` | `month` | +| `period_payload.fdate` | `fdate` | `month` | +| `period_payload.weekSfdate` | `weekSfdate` | `week` | +| `period_payload.weekEfdate` | `weekEfdate` | `week` | + +## Verification + +Passed: + +1. `cargo test --test scene_generator_test generator_derives_reusable_request_field_mappings_for_real_g2_fixture -- --nocapture` +2. `cargo test --test scene_generator_test generator_writes_multi_mode_package_with_generation_report -- --nocapture` +3. `cargo test --test scene_generator_modes_test -- --nocapture` +4. `cargo test --test scene_generator_test generator_writes_multi_mode_package_from_deterministic_analysis -- --nocapture` +5. `cargo test --test scene_generator_test generator_blocks_incomplete_multi_mode_contract -- --nocapture` + +## Residuals + +1. This route did not touch `runtime_url_classification`, `embedded_dictionary_extraction`, `parameter_default_semantics`, or `alias_generation`. +2. Non-`multi_mode_request` request builders were not normalized in this slice. +3. No full rematerialization or validation refresh was run yet. + +## Next Route + +Per the fixed route sequence, the next route is: + +- `runtime_url_classification_hardening` diff --git a/docs/superpowers/reports/2026-04-20-generated-scene-rule-hardening-route-sequence-report.md b/docs/superpowers/reports/2026-04-20-generated-scene-rule-hardening-route-sequence-report.md new file mode 100644 index 0000000..2f007fe --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-generated-scene-rule-hardening-route-sequence-report.md @@ -0,0 +1,20 @@ +# Generated Scene Rule Hardening Route Sequence Report + +- Date: 2026-04-20 +- Source ledger: tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json + +## Fixed Route Order + +1. resolver_request_mapping_hardening +2. runtime_url_classification_hardening +3. embedded_dictionary_extraction_hardening +4. parameter_default_semantics_recovery_hardening +5. alias_generation_hardening + +## Route Clusters + +- resolver_request_mapping_hardening: count=102, high=76, medium=26 +- runtime_url_classification_hardening: count=102, high=76, medium=26 +- embedded_dictionary_extraction_hardening: count=102, high=76, medium=26 +- parameter_default_semantics_recovery_hardening: count=89, high=75, medium=14 +- alias_generation_hardening: count=84, high=73, medium=11 diff --git a/docs/superpowers/reports/2026-04-20-generated-scene-source-evidence-cross-scan-report.md b/docs/superpowers/reports/2026-04-20-generated-scene-source-evidence-cross-scan-report.md new file mode 100644 index 0000000..310b176 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-generated-scene-source-evidence-cross-scan-report.md @@ -0,0 +1,60 @@ +# Generated Scene Source Evidence Cross-Scan Report + +- Date: 2026-04-20 +- Plan: docs/superpowers/plans/2026-04-20-generated-scene-source-evidence-cross-scan-plan.md +- Scope: 102 original source scenes under D:/desk/智能体资料/全量业务场景/一平台场景 +- Mode: analysis-only + +## Summary + +- Total scenes scanned: 102 +- Auto-normalized mappings: 92 +- Manual override mappings: 10 +- Unmapped scenes: 0 +- Invocation alias evidence: 85 +- Dictionary evidence: 102 +- Default parameter evidence: 102 +- Request mapping evidence: 102 +- Runtime URL evidence: 102 +- Scenes that look most similar to sweep-030-scene: 10 + +## Sweep-030 Anchor + +- Source dir: 台区线损大数据-月_周累计线损率统计分析 +- Evidence flags: {"has_invocation_alias_evidence":true,"has_dictionary_evidence":true,"has_default_parameter_evidence":true,"has_request_mapping_evidence":true,"has_runtime_url_evidence":true} +- Representative files: {"invocation_alias":["index.html"],"dictionary":["index.html","mca.js","css/elementui.css","js/city.js","js/common.js"],"parameter_default_semantics":["ami.js","index.html","mca.js","js/byDayWord.js","js/common.js"],"resolver_to_request_mapping":["ami.js","index.html","css/elementui.css","js/byDayWord.js","js/city.js"],"runtime_url_semantics":["ami.js","index.html","mca.js","js/byDayWord.js","js/common.js"]} +- Request field tokens: orgCode, orgno, orgNo, fdate, weekSfdate, weekEfdate, month, week + +## Scenes Most Similar To Sweep-030 + +- sweep-020-scene 供电所线路电量统计: dictionary/default/request/runtime evidence all present +- sweep-023-scene 供电质量看板-武威: dictionary/default/request/runtime evidence all present +- sweep-030-scene 台区线损大数�?月_周累计线损率统计分析: dictionary/default/request/runtime evidence all present +- sweep-031-scene 台区零度户月度用电量与台区线损电量对比核查报�?: dictionary/default/request/runtime evidence all present +- sweep-070-scene 电量、站损自动采集上�?: dictionary/default/request/runtime evidence all present +- sweep-076-scene 白银线损周报: dictionary/default/request/runtime evidence all present +- sweep-078-scene 线损同期差异报表: dictionary/default/request/runtime evidence all present +- sweep-079-scene 线损大数�?窃电分析: dictionary/default/request/runtime evidence all present +- sweep-082-scene 营配户变精准核查: dictionary/default/request/runtime evidence all present +- sweep-083-scene 营销业务管控监测日报�?: dictionary/default/request/runtime evidence all present + +## Manual Mapping Overrides + +- sweep-001-scene -> 95598、12398、流程超期风险工单明细 +- sweep-002-scene -> 95598、12398及配网设备监控情况周统计 +- sweep-009-scene -> 95598重要服务事项报备统计表 +- sweep-030-scene -> 台区线损大数据-月_周累计线损率统计分析 +- sweep-033-scene -> 售电收入日统计 +- sweep-034-scene -> 售电收入日统计排程预测 +- sweep-036-scene -> 嘉峪关-负载监控预警分析与突增负荷筛查 +- sweep-043-scene -> 国网天水供电公司95598、12398工单情况月报 +- sweep-079-scene -> 线损大数据-窃电分析 +- sweep-088-scene -> 运营管控双周报-金昌 + +## Interpretation + +- dictionary_recovery_gap is not isolated to sweep-030-scene; multiple scenes contain source-side dictionary/tree signals that are not guaranteed to be fully restored into generated references today. +- parameter_default_semantics_gap appears across parameterized and report-style scenes where source JS initializes dates implicitly via moment/dayjs or mode-specific date fields. +- resolver_to_request_mapping_gap is widespread wherever source scenes embed $.ajax/fetch request bodies with business field names such as orgno, fdate, weekSfdate, and weekEfdate. +- runtime_url_semantics_gap is common because many source scenes contain both app-entry and deeper route/API URL evidence; generation currently does not prove that those URL roles are separated explicitly. +- The next child stage should build a runtime-semantics ledger from this source cross-scan before any rule hardening or rematerialization starts. diff --git a/docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md b/docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md new file mode 100644 index 0000000..440682e --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md @@ -0,0 +1,65 @@ +# Generated Scene Source-First Runtime Semantics Ledger Report + +- Date: 2026-04-20 +- Plan: docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-plan.md +- Scope: 102-scene source-first runtime semantics ledger +- Mode: analysis-only + +## Summary + +- Total scenes represented: 102 +- High risk: 76 +- Medium risk: 26 +- Low risk: 0 +- Scenes with generator-level gaps: 102 +- Scenes with runtime-only residuals: 34 + +## Gap Counts + +- invocation_alias_gap: 84 +- dictionary_recovery_gap: 102 +- parameter_default_semantics_gap: 89 +- resolver_to_request_mapping_gap: 102 +- runtime_url_semantics_gap: 102 + +## Input Asset Status + +- source cross-scan JSON parsed successfully +- deterministic invocation readiness JSON parsed successfully: False +- natural-language parameter readiness JSON parsed successfully: False +- parameter dictionary normalization JSON parsed successfully: False +- For malformed auxiliary assets, this stage fell back to direct `scene.toml` / `references` / script inspection rather than stopping the ledger. + +## Interpretation + +- The ledger now treats the `multi_mode_request` family and scenes with unresolved period-default semantics as the highest-risk route cluster rather than flattening the entire 102-scene set into one undifferentiated bucket. +- `runtime_url_semantics_gap` and `resolver_to_request_mapping_gap` are the broadest reusable gaps in the current 102-scene set because source-side evidence is widespread while generated manifests still do not encode explicit reusable role separation or mapping metadata. +- `dictionary_recovery_gap` remains large because generated `org-dictionary.json` assets are frequently absent or starter-sized relative to source-side dictionary/tree evidence. +- `invocation_alias_gap` is more selective than the other four gaps, which means alias hardening should be driven by scenes whose generated deterministic keywords remain narrow rather than by every scene equally. +- Host/runtime-heavy archetypes still carry `runtimeOnlyResidual` even where generator-level hardening is also indicated. + +## Highest-Reuse Route Clusters + +- resolver_request_mapping_hardening: 102 +- runtime_url_classification_hardening: 102 +- parameter_default_semantics_recovery_hardening: 89 +- alias_generation_hardening: 84 +- embedded_dictionary_extraction_hardening: 102 + +## High-Risk Examples + +- sweep-002-scene 95598�?2398及配网设备监控情况周统计: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" +- sweep-003-scene 95598业务处理满意率统计日�?: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" +- sweep-004-scene 95598供电服务月报: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" +- sweep-005-scene 95598工单明细�?: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" +- sweep-006-scene 95598抢修回单分析报告: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" +- sweep-007-scene 95598报修工单日管�?: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening, runtime_validation_followup" +- sweep-008-scene 95598服务风险清单: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" +- sweep-009-scene 95598重要服务事项报备统计�?: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening, runtime_validation_followup" +- sweep-010-scene _代理购电、市场化用户电量分析: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening, runtime_validation_followup" +- sweep-011-scene 下达项目数据汇总统�?: gaps="invocation_alias_gap, dictionary_recovery_gap, parameter_default_semantics_gap, resolver_to_request_mapping_gap, runtime_url_semantics_gap" routes="alias_generation_hardening, embedded_dictionary_extraction_hardening, parameter_default_semantics_recovery_hardening, resolver_request_mapping_hardening, runtime_url_classification_hardening" + +## Next Step + +- The next child stage should convert this ledger into bounded rule-hardening routes, prioritized by reusable coverage rather than by scene-by-scene debugging order. +- The immediate downstream artifact should be a route-design / route-sequencing plan driven by this ledger. diff --git a/docs/superpowers/reports/2026-04-20-rules-102-business-targets-candidate-report.md b/docs/superpowers/reports/2026-04-20-rules-102-business-targets-candidate-report.md new file mode 100644 index 0000000..9293e98 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-rules-102-business-targets-candidate-report.md @@ -0,0 +1,36 @@ +# Rules Candidate For 102 Scene Skills + +## Summary + +The current [rules.json](/D:/data/ideaSpace/rust/sgClaw/claw-new/resources/rules.json) file is still a demo allowlist and does not cover the real business targets used by the 102 final materialized scene skills. + +The candidate allowlist additions are published at: + +- [rules-102-business-targets-candidate.json](/D:/data/ideaSpace/rust/sgClaw/claw-new/resources/rules-102-business-targets-candidate.json) + +## Coverage + +- Business bootstrap targets found across 102 skills: `17` +- IP or `IP:port` targets: `10` +- Domain or `domain:port` targets: `7` + +## Direct Copy Candidate + +Copy the `domains.allowed_additions` array from: + +- [rules-102-business-targets-candidate.json](/D:/data/ideaSpace/rust/sgClaw/claw-new/resources/rules-102-business-targets-candidate.json) + +into the `domains.allowed` section of: + +- [rules.json](/D:/data/ideaSpace/rust/sgClaw/claw-new/resources/rules.json) + +## Recommended Merge Rule + +Preserve the existing demo entries if they are still needed for local demos, but append the 17 real business targets for the inner-network validation environment. + +If the inner-network machine is validation-only, it is also reasonable to keep a separate validation copy of `rules.json` that contains only: + +- the current required `pipe_actions` +- the 17 business targets + +This report does not modify the active [rules.json](/D:/data/ideaSpace/rust/sgClaw/claw-new/resources/rules.json). diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-deterministic-invocation-readiness-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-deterministic-invocation-readiness-report.md new file mode 100644 index 0000000..da280f0 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-deterministic-invocation-readiness-report.md @@ -0,0 +1,53 @@ +# Scene Skill 102 Deterministic Invocation Readiness Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-deterministic-invocation-readiness-plan.md` + +## Scope + +This plan normalized deterministic invocation metadata and ran dispatch dry-run checks only. It did not execute browser scripts, change runtime dispatch code, repair `sweep-012-scene`, or start static/mock/production validation. + +## Summary + +| Metric | Count | +| --- | ---: | +| `totalScenes` | 102 | +| `completePackages` | 101 | +| `excludedMaterializationFailed` | 1 | +| `normalizedSceneToml` | 101 | +| `dispatchReady` | 92 | +| `dispatchGaps` | 9 | +| `ambiguous` | 9 | +| `noMatch` | 0 | +| `promptOrOther` | 0 | +| `withParams` | 10 | + +## Interpretation + +- Complete packages now use deterministic suffix `???`. +- Full-scene-name samples were used as the primary dispatch readiness check. +- Parameter-bearing scenes are recorded separately; this plan does not prove real parameter correctness. +- `sweep-012-scene / ????????` remains excluded because the skill package is incomplete. + +## Gaps + +| Scene ID | Scene name | Dispatch status | Detail | +| --- | --- | --- | --- | +| `sweep-026-scene` | 县区公司故障明细 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-026-scene", "alsoSceneId": "sweep-059-scene", "score": 10}` | +| `sweep-034-scene` | 售电收入日统计排程预测 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-033-scene", "alsoSceneId": "sweep-034-scene", "score": 10}` | +| `sweep-037-scene` | 嘉峪关可靠性分析报告 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-036-scene", "alsoSceneId": "sweep-037-scene", "score": 10}` | +| `sweep-038-scene` | 嘉峪关周报 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-036-scene", "alsoSceneId": "sweep-038-scene", "score": 10}` | +| `sweep-039-scene` | 嘉峪关故障明细 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-036-scene", "alsoSceneId": "sweep-039-scene", "score": 10}` | +| `sweep-040-scene` | 嘉峪关日报 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-036-scene", "alsoSceneId": "sweep-040-scene", "score": 10}` | +| `sweep-041-scene` | 嘉峪关月报 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-036-scene", "alsoSceneId": "sweep-041-scene", "score": 10}` | +| `sweep-044-scene` | 国网金昌供电公司指挥中心生产例会报告 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-044-scene", "alsoSceneId": "sweep-088-scene", "score": 10}` | +| `sweep-045-scene` | 国网金昌供电公司营商环境周例会报告 | `ambiguous` | `{"status": "ambiguous", "selectedSceneId": "sweep-045-scene", "alsoSceneId": "sweep-088-scene", "score": 10}` | + +## Assets + +- `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_samples_2026-04-20.json` + +## Closure + +The deterministic invocation readiness plan is complete. Next work should address the 9 ambiguous dispatch gaps with a bounded keyword/scoring refinement plan, and address `sweep-012-scene` materialization recovery before claiming 102/102 deterministic callable skills. diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md new file mode 100644 index 0000000..f489846 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md @@ -0,0 +1,35 @@ +# Scene Skill 102 Full Direct Mock Execution Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-full-direct-mock-execution-plan.md` + +## Scope + +This run executed all 102 generated scene skill scripts in a local mock runtime. It did not use a real browser, real network, production credentials, or business systems. It did not modify generated skill packages. + +## Summary + +| Status | Count | +| --- | ---: | +| `direct-mock-pass` | 102 | + +## By Archetype + +| Archetype | Result | +| --- | --- | +| `host_bridge_workflow` | direct-mock-pass: 26 | +| `local_doc_pipeline` | direct-mock-pass: 6 | +| `multi_endpoint_inventory` | direct-mock-pass: 2 | +| `multi_mode_request` | direct-mock-pass: 10 | +| `page_state_eval` | direct-mock-pass: 2 | +| `paginated_enrichment` | direct-mock-pass: 51 | +| `single_request_enrichment` | direct-mock-pass: 5 | + +## Interpretation + +Direct mock execution passing means every generated skill entrypoint can load and complete against controlled fake dependencies. It still does not mean production execution passed. + +## Next Step + +If this full direct mock run is acceptable, the next bounded stage is pseudo-production batch selection. That stage should choose a small, archetype-balanced batch for real or quasi-real environment execution planning. + diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md new file mode 100644 index 0000000..0fb576c --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md @@ -0,0 +1,35 @@ +# Scene Skill 102 Mock Runtime Harness Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-mock-runtime-harness-implementation-plan.md` + +## Scope + +This run executed only representative generated scripts inside a local mock runtime. It did not use a real browser, real network, production credentials, or business systems. It did not modify generated skill packages. + +## Summary + +| Status | Count | +| --- | ---: | +| `mock-runtime-pass` | 19 | + +## By Archetype + +| Archetype | Representatives | Result | +| --- | --- | --- | +| `host_bridge_workflow` | `sweep-007-scene, sweep-009-scene, sweep-010-scene` | mock-runtime-pass: 3 | +| `local_doc_pipeline` | `sweep-012-scene, sweep-017-scene, sweep-019-scene` | mock-runtime-pass: 3 | +| `multi_endpoint_inventory` | `sweep-084-scene, sweep-085-scene` | mock-runtime-pass: 2 | +| `multi_mode_request` | `sweep-020-scene, sweep-023-scene, sweep-030-scene` | mock-runtime-pass: 3 | +| `page_state_eval` | `sweep-066-scene, sweep-094-scene` | mock-runtime-pass: 2 | +| `paginated_enrichment` | `sweep-001-scene, sweep-002-scene, sweep-003-scene` | mock-runtime-pass: 3 | +| `single_request_enrichment` | `sweep-013-scene, sweep-016-scene, sweep-068-scene` | mock-runtime-pass: 3 | + +## Interpretation + +Representative mock execution passing means the generated scripts can load and traverse their main control flow against fake dependencies. It does not mean every one of the 102 scripts was directly executed, and it does not mean production execution passed. + +## Next Step + +If continuing, the next bounded stage should expand mock runtime from representative execution to full 102 direct mock execution, or select a small pseudo-production batch if representative coverage is considered sufficient. + diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-natural-language-parameter-readiness-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-natural-language-parameter-readiness-report.md new file mode 100644 index 0000000..312a356 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-natural-language-parameter-readiness-report.md @@ -0,0 +1,50 @@ +# Scene Skill 102 Natural-Language Parameter Readiness Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-natural-language-parameter-readiness-plan.md` +> Refreshed By: `2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-plan.md` + +## Summary + +| Metric | Count | +| --- | ---: | +| `total scenes` | `102` | +| `complete packages` | `102` | +| `declared params` | `10` | +| `required-param scenes` | `10` | +| `parameter-ready` | `10` | +| `parameter-gap` | `0` | +| `parameter-not-required` | `31` | +| `parameter-implicit-risk` | `61` | +| `empty org dictionaries` | `0` | +| `starter dictionary scenes` | `10` | +| `unsupported resolver scenes` | `0` | + +## Key Conclusion + +The final 102 skills are deterministic-dispatch ready. After starter dictionary normalization, the `10` required-param scenes are parameter-ready for pseudo-production starter validation. They still rely on a starter organization dictionary, not a full production unit tree. + +Do not test the required-param scenes with only `场景名。。。`. Use organization, mode, and concrete period, for example `兰州公司 台区线损大数据-月_周累计线损率统计分析 月累计 2026-03。。。`. + +## Required-Param Ready Scenes + +| Scene | Required params | Recommended input | Dictionary scope | +| --- | --- | --- | --- | +| `sweep-020-scene / 供电所线路电量统计` | `org, period` | `兰州公司 供电所线路电量统计 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-023-scene / 供电质量看板-武威` | `org, period` | `兰州公司 供电质量看板-武威 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-030-scene / 台区线损大数据-月_周累计线损率统计分析` | `org, period` | `兰州公司 台区线损大数据-月_周累计线损率统计分析 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-031-scene / 台区零度户月度用电量与台区线损电量对比核查报表` | `org, period` | `兰州公司 台区零度户月度用电量与台区线损电量对比核查报表 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-070-scene / 电量、站损自动采集上报` | `org, period` | `兰州公司 电量、站损自动采集上报 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-076-scene / 白银线损周报` | `org, period` | `兰州公司 白银线损周报 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-078-scene / 线损同期差异报表` | `org, period` | `兰州公司 线损同期差异报表 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-079-scene / 线损大数据-窃电分析` | `org, period` | `兰州公司 线损大数据-窃电分析 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-082-scene / 营配户变精准核查` | `org, period` | `兰州公司 营配户变精准核查 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | +| `sweep-083-scene / 营销业务管控监测日报表` | `org, period` | `兰州公司 营销业务管控监测日报表 月累计 2026-03。。。` | `starter: 兰州/城关/天水` | + +## No-Param Scenes With Implicit Query Risk + +`61` scenes do not declare structured params but their names imply likely filters. They can be dispatch-selected with `场景名。。。`, but extra natural-language conditions will not become structured runtime args today. + +## Next Step + +Use the refreshed pseudo-production handoff for operator-run pseudo-production execution. If broader organization coverage is required, replace the starter dictionary with the real production unit tree in a separate bounded plan. diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-report.md new file mode 100644 index 0000000..266b80e --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-report.md @@ -0,0 +1,59 @@ +# Scene Skill 102 Parameter Dictionary And Invocation Template Normalization Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-plan.md` + +## Summary + +| Metric | Count | +| --- | ---: | +| `fixed required-param scenes` | `10` | +| `parameter-ready` | `10` | +| `parameter-gap` | `0` | +| `starter dictionary scenes` | `10` | +| `empty org dictionaries` | `0` | +| `unsupported resolver scenes` | `0` | + +## What Changed + +Populated `references/org-dictionary.json` for the fixed `10` required-param `multi_mode_request` skills using a pseudo-production starter dictionary derived from existing deterministic-submit fixture evidence. + +The starter dictionary contains `兰州公司`, `城关供电分公司`, and `天水公司` aliases. It is sufficient to validate natural-language parameter plumbing, but it is not a full production organization tree. + +## Updated Dictionaries + +| Scene | Before | After | +| --- | ---: | ---: | +| `sweep-020-scene` | `3` | `3` | +| `sweep-023-scene` | `3` | `3` | +| `sweep-030-scene` | `3` | `3` | +| `sweep-031-scene` | `3` | `3` | +| `sweep-070-scene` | `3` | `3` | +| `sweep-076-scene` | `3` | `3` | +| `sweep-078-scene` | `3` | `3` | +| `sweep-079-scene` | `3` | `3` | +| `sweep-082-scene` | `3` | `3` | +| `sweep-083-scene` | `3` | `3` | + +## Pseudo-Production Handoff Updates + +| Scene | Before | After | +| --- | --- | --- | + +## Operator Guidance + +1. For these `10` scenes, use parameterized input such as `兰州公司 台区线损大数据-月_周累计线损率统计分析 月累计 2026-03。。。`. +2. `月累计` or `周累计` alone is not enough; include a concrete period value. +3. The starter dictionary proves resolver plumbing, not full production organization coverage. +4. Do not store production credentials, cookies, tokens, or VPN secrets in the repository. + +## Output Assets + +- `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_natural_language_invocation_samples_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_handoff_2026-04-20.json` + +## Next Step + +Proceed to operator-run pseudo-production execution only if the environment is available, using the refreshed handoff. If real unit coverage beyond the starter dictionary is required, create a separate production unit-tree dictionary plan. diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-report.md new file mode 100644 index 0000000..2a61b12 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-report.md @@ -0,0 +1,49 @@ +# Scene Skill 102 Pseudo-Production Batch Execution Preparation Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-plan.md` + +## Scope + +This is preparation-only. It did not run browser automation, access real networks, use credentials, modify generated skill packages, or update the official board. + +## Batch Summary + +| Metric | Count | +| --- | ---: | +| Selected scenes | 10 | +| paginated_enrichment | 4 | +| multi_mode_request | 2 | +| single_request_enrichment | 2 | +| multi_endpoint_inventory | 1 | +| page_state_eval | 1 | + +## Prepared Assets + +- `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_handoff_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_evidence_checklist_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_record_template_2026-04-20.json` + +## Selected Scenes + +| Scene ID | Scene Name | Archetype | Evidence Dir | +| --- | --- | --- | --- | +| `sweep-001-scene` | 95598、12398、流程超期风险工单明细 | `paginated_enrichment` | `pseudoprod_evidence/sweep-001-scene/` | +| `sweep-002-scene` | 95598、12398及配网设备监控情况周统计 | `paginated_enrichment` | `pseudoprod_evidence/sweep-002-scene/` | +| `sweep-003-scene` | 95598业务处理满意率统计日报 | `paginated_enrichment` | `pseudoprod_evidence/sweep-003-scene/` | +| `sweep-004-scene` | 95598供电服务月报 | `paginated_enrichment` | `pseudoprod_evidence/sweep-004-scene/` | +| `sweep-020-scene` | 供电所线路电量统计 | `multi_mode_request` | `pseudoprod_evidence/sweep-020-scene/` | +| `sweep-023-scene` | 供电质量看板-武威 | `multi_mode_request` | `pseudoprod_evidence/sweep-023-scene/` | +| `sweep-013-scene` | 业扩报装质量评价体系 | `single_request_enrichment` | `pseudoprod_evidence/sweep-013-scene/` | +| `sweep-016-scene` | 供电可靠性指标完成情况统计 | `single_request_enrichment` | `pseudoprod_evidence/sweep-016-scene/` | +| `sweep-084-scene` | 计量数据助手 | `multi_endpoint_inventory` | `pseudoprod_evidence/sweep-084-scene/` | +| `sweep-066-scene` | 用户停电频次分析监测 | `page_state_eval` | `pseudoprod_evidence/sweep-066-scene/` | + +## Credential Policy + +Credentials, cookies, tokens, VPN secrets, and private keys must stay outside the repository. Evidence records should reference operator-provided environment state, not embed secrets. + +## Next Step + +The next bounded plan may execute this prepared batch in a quasi-production environment. It should consume these templates, collect evidence, and classify each scene using the allowed result states. + diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-selection-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-selection-report.md new file mode 100644 index 0000000..3015274 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-selection-report.md @@ -0,0 +1,55 @@ +# Scene Skill 102 Pseudo-Production Batch Selection Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-pseudoprod-batch-selection-plan.md` + +## Scope + +This is selection-only. It did not run browser automation, access real networks, use production credentials, modify generated skill packages, or update the official board. + +## Summary + +| Metric | Count | +| --- | ---: | +| Total scenes | 102 | +| Eligible pseudo-prod-ready | 70 | +| Selected first batch | 10 | +| Deferred | 92 | + +## Selected By Archetype + +| Archetype | Count | +| --- | ---: | +| `paginated_enrichment` | 4 | +| `multi_mode_request` | 2 | +| `single_request_enrichment` | 2 | +| `multi_endpoint_inventory` | 1 | +| `page_state_eval` | 1 | + +## Selected Scenes + +| Scene ID | Scene Name | Archetype | Deterministic Input | +| --- | --- | --- | --- | +| `sweep-001-scene` | 95598、12398、流程超期风险工单明细 | `paginated_enrichment` | `95598、12398、流程超期风险工单明细???` | +| `sweep-002-scene` | 95598、12398及配网设备监控情况周统计 | `paginated_enrichment` | `95598、12398及配网设备监控情况周统计???` | +| `sweep-003-scene` | 95598业务处理满意率统计日报 | `paginated_enrichment` | `95598业务处理满意率统计日报???` | +| `sweep-004-scene` | 95598供电服务月报 | `paginated_enrichment` | `95598供电服务月报???` | +| `sweep-020-scene` | 供电所线路电量统计 | `multi_mode_request` | `供电所线路电量统计???` | +| `sweep-023-scene` | 供电质量看板-武威 | `multi_mode_request` | `供电质量看板-武威???` | +| `sweep-013-scene` | 业扩报装质量评价体系 | `single_request_enrichment` | `业扩报装质量评价体系???` | +| `sweep-016-scene` | 供电可靠性指标完成情况统计 | `single_request_enrichment` | `供电可靠性指标完成情况统计???` | +| `sweep-084-scene` | 计量数据助手 | `multi_endpoint_inventory` | `计量数据助手???` | +| `sweep-066-scene` | 用户停电频次分析监测 | `page_state_eval` | `用户停电频次分析监测???` | + +## Required Evidence + +Each selected scene must collect console log, network log or request summary, screenshot when a browser target page is required, exported file if produced, generation report reference, deterministic invocation input, and final execution classification. + +## Deferred Scope + +`host_bridge_workflow` and `local_doc_pipeline` are excluded from this first batch because they require real-environment dependencies. Eligible-but-not-selected scenes remain available for later pseudo-production batches. + +## Next Step + +The next bounded plan should be pseudo-production batch execution preparation for these 10 selected scenes. It should still avoid storing credentials in the repository and should define environment handoff requirements before any real execution. + diff --git a/docs/superpowers/reports/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-report.md b/docs/superpowers/reports/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-report.md new file mode 100644 index 0000000..f054d94 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-report.md @@ -0,0 +1,79 @@ +# Scene Skill 102 Static, Mock, And Pseudo-Production Validation Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-plan.md` + +## Scope + +This validation stage consumed the final `102` materialized skill package set. It did not rematerialize skills, did not modify analyzer/generator code, did not update the official execution board, and did not start real browser or production execution. + +## Baseline + +| Item | Count | +| --- | ---: | +| final materialized skill packages | 102 / 102 | +| deterministic invocation ready | 102 / 102 | +| known materialization failures | 0 | +| deterministic ambiguities before this validation | 0 | + +## Results + +| Layer | Pass / Ready | Gap | +| --- | ---: | ---: | +| static package validation | 102 | 0 | +| deterministic dispatch dry-run | 102 | 0 | +| mock runtime representative coverage | 19 representatives | 83 matrix rows need harness execution | +| pseudo-production readiness | 70 pseudo-prod-ready | 32 real-env-required | + +## Static Validation + +Static validation produced `102` `static-validated` records and `0` `static-invalid` records. + +Asset: `tests/fixtures/generated_scene/scene_skill_102_static_validation_2026-04-20.json` + +## Deterministic Dispatch Dry-Run + +Dispatch dry-run produced `102` pass records, `0` ambiguous records, `0` no-match records, and `0` wrong-match records. + +Asset: `tests/fixtures/generated_scene/scene_skill_102_dispatch_dry_run_validation_2026-04-20.json` + +## Mock Runtime Matrix + +This phase produced a matrix only. It did not execute generated scripts. Representative harnesses are required before mock-runtime-pass can be claimed. + +| Archetype | Count | Representatives | Harness | +| --- | ---: | --- | --- | +| `host_bridge_workflow` | 26 | `sweep-007-scene, sweep-009-scene, sweep-010-scene` | `mock-host-bridge-action-and-callback` | +| `local_doc_pipeline` | 6 | `sweep-012-scene, sweep-017-scene, sweep-019-scene` | `mock-local-doc-service-and-doc-export` | +| `multi_endpoint_inventory` | 2 | `sweep-084-scene, sweep-085-scene` | `mock-multi-endpoint-inventory-fetch` | +| `multi_mode_request` | 10 | `sweep-020-scene, sweep-023-scene, sweep-030-scene` | `mock-fetch-mode-switch-request` | +| `page_state_eval` | 2 | `sweep-066-scene, sweep-094-scene` | `mock-browser-dom-page-state` | +| `paginated_enrichment` | 51 | `sweep-001-scene, sweep-002-scene, sweep-003-scene` | `mock-fetch-pagination-enrichment-export` | +| `single_request_enrichment` | 5 | `sweep-013-scene, sweep-016-scene, sweep-068-scene` | `mock-fetch-single-request-enrichment` | + +Asset: `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json` + +## Pseudo-Production Readiness + +| Status | Count | +| --- | ---: | +| `pseudo-prod-ready` | 70 | +| `real-env-required` | 32 | + +Asset: `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_readiness_2026-04-20.json` + +## Interpretation + +The `102 / 102` materialized skill set is structurally valid and deterministic-dispatchable under dry-run. This still does not mean `102 / 102` production execution pass. Mock harness execution and real-environment validation remain separate stages. + +## Recommended Next Step + +Start a bounded mock runtime harness implementation plan. The first slice should implement representative harnesses for the largest and most reused archetypes before attempting broad real-environment validation. + +Recommended order: + +1. paginated_enrichment mock harness +2. multi_mode_request and single_request_enrichment mock harnesses +3. local_doc_pipeline and host_bridge_workflow mock harnesses +4. pseudo-production batch selection for real-environment validation + diff --git a/docs/superpowers/reports/2026-04-20-sweep-012-materialization-recovery-report.md b/docs/superpowers/reports/2026-04-20-sweep-012-materialization-recovery-report.md new file mode 100644 index 0000000..91c02a0 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-sweep-012-materialization-recovery-report.md @@ -0,0 +1,39 @@ +# Sweep 012 Materialization Recovery Report + +Date: 2026-04-20 +Plan: `2026-04-20-sweep-012-materialization-recovery-plan.md` + +## Result + +`sweep-012-scene / 业扩报装管理制度` has been recovered and materialized into the final 102 skill set. + +| Field | Value | +| --- | --- | +| Workflow archetype | `local_doc_pipeline` | +| Readiness | `A` | +| Skill dir | `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-012-scene` | +| Deterministic suffix | `。。。` | +| Dispatch keyword | `业扩报装管理制度` | + +## Root Cause + +The source is a local report-log and Word-export pipeline using `ReportServices` plus `docExport`, but it was previously absorbed by `host_bridge_workflow`, whose runtime contract is intentionally fail-closed. The fix recognizes report-log evidence as local document pipeline evidence and gives local-doc pipelines a local source bootstrap target instead of requiring a remote business URL. + +## Updated Assets + +- Final materialization manifest now reports `requiredFilesPresent = 102` and `failures = 0`. +- Final materialization failures are cleared. +- `SCENE_INDEX.md` and `scene_skill_102_index.json` now list `sweep-012-scene` as materialized. +- Deterministic readiness after keyword refinement now reports `afterReady = 102`. + +## Verification + +- `cargo test --test scene_generator_test generator_recovers_sweep_012_report_log_doc_pipeline_package -- --nocapture` +- `cargo test --test scene_generator_test generator_writes_g3_g8_mixed_boundary_fixture_as_paginated_enrichment -- --nocapture` +- `cargo test --test scene_generator_test generator_writes_g8_local_doc_pipeline_package -- --nocapture` +- `cargo test --test scene_generator_canonical_test -- --nocapture` +- single-scene `sg_scene_generate` for `sweep-012-scene` + +## Boundary + +This recovery did not update the official execution board, start static/mock/production validation, or modify other final skill packages. diff --git a/docs/superpowers/reports/2026-04-20-sweep-015-direct-mock-partial-closure-report.md b/docs/superpowers/reports/2026-04-20-sweep-015-direct-mock-partial-closure-report.md new file mode 100644 index 0000000..03df79e --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-sweep-015-direct-mock-partial-closure-report.md @@ -0,0 +1,71 @@ +# Sweep 015 Direct Mock Partial Closure Report + +> Date: 2026-04-20 +> Plan: `2026-04-20-sweep-015-direct-mock-partial-closure-plan.md` + +## Scope + +This closure only handled the single remaining full direct mock partial: + +- `sweep-015-scene / 任务报表` + +It did not modify generated skill packages, generator/analyzer code, official board state, browser runtime, network access, or production systems. + +## Root Cause + +`sweep-015-scene` generated script applies this business filter: + +```js +row.status == 5 +``` + +The full direct mock runner's generic fake row did not include `status = 5`, so mock data was filtered out and the artifact became `partial` with zero rows. + +This was a mock fixture contract gap, not a generated skill or generator defect. + +## Change + +Updated the direct mock runner fake row to include: + +```js +status: 5 +``` + +## Result + +Full direct mock execution now reports: + +| Status | Count | +| --- | ---: | +| `direct-mock-pass` | 102 | +| `direct-mock-partial` | 0 | +| `direct-mock-fail` | 0 | + +By archetype: + +| Archetype | Pass | +| --- | ---: | +| `paginated_enrichment` | 51 | +| `host_bridge_workflow` | 26 | +| `local_doc_pipeline` | 6 | +| `multi_mode_request` | 10 | +| `single_request_enrichment` | 5 | +| `page_state_eval` | 2 | +| `multi_endpoint_inventory` | 2 | + +## Verification + +Command: + +```powershell +node tests/generated_scene_full_direct_mock_runner.js +``` + +Output asset: + +- `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` + +## Stop Statement + +This plan stops here. The next bounded stage can be pseudo-production batch selection, but that is not started under this closure. + diff --git a/docs/superpowers/reports/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-report.md b/docs/superpowers/reports/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-report.md new file mode 100644 index 0000000..bb630b8 --- /dev/null +++ b/docs/superpowers/reports/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-report.md @@ -0,0 +1,41 @@ +# Sweep-030 Deterministic Keyword / Alias Normalization Report + +## Scope + +This route only fixes deterministic matchability for: + +- `sweep-030-scene` + +It does not modify runtime, callback-host, resolver implementation, or bootstrap target selection. + +## Change + +Updated: + +- [sweep-030-scene/scene.toml](/D:/data/ideaSpace/rust/sgClaw/claw-new/examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-030-scene/scene.toml) + +The deterministic manifest now recognizes these line-loss aliases: + +- `台区线损大数据-月_周累计线损率统计分析` +- `台区线损大数据 月累计线损率统计分析` +- `台区线损大数据 周累计线损率统计分析` +- `台区线损大数据 月累计` +- `台区线损大数据 周累计` +- `台区线损率统计分析` +- `台区线损` + +## Result + +The route-local result is: + +- before: the natural service-console wording could be rejected before skill selection +- after: `sweep-030-scene` is dispatch-matchable from the natural line-loss wording + +## Boundary + +This plan does not claim that helper bootstrap is fixed. + +The next layer to validate is still: + +- callback-host helper startup after `sweep-030-scene` is actually selected +- whether `page_url` / bootstrap target must be narrowed to the concrete business page diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-embedded-dictionary-extraction-hardening-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-embedded-dictionary-extraction-hardening-report.md new file mode 100644 index 0000000..be48a3b --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-embedded-dictionary-extraction-hardening-report.md @@ -0,0 +1,52 @@ +# Generated Scene Embedded Dictionary Extraction Hardening Report + +## Scope + +Executed the first reusable slice defined by `2026-04-20-generated-scene-embedded-dictionary-extraction-hardening-plan.md`. + +Bounded scope: + +- source-side dictionary evidence extraction for `multi_mode_request`-like scenes +- anchor validation on `sweep-030-scene` +- generator-side `org-dictionary.json` emission from extracted evidence + +Out of scope: + +- runtime resolver changes +- materialized skill refresh under `examples/` +- 102-scene rematerialization +- validation refresh + +## Outcome + +This route now supports source-driven org dictionary extraction from embedded dictionary files such as `city.js`, `dict.js`, `enum.js`, and option-like files. + +The first reusable slice is closed for the route anchor: + +- `extract_deterministic_scene_facts(...)` now carries `org_dictionary_entries` +- `generate_scene_package(...)` writes `references/org-dictionary.json` from source-derived entries when evidence exists +- real `sweep-030-scene` source evidence produces company-level codes including `62401` and `62408` +- synthetic fixture coverage remains bounded: when no source dictionary evidence exists, generated `org-dictionary.json` remains `[]` + +## Files Changed + +- `src/generated_scene/analyzer.rs` +- `src/generated_scene/generator.rs` +- `tests/scene_generator_test.rs` + +## Verification + +Passed: + +```powershell +cargo test --test scene_generator_test analyzer_extracts_embedded_org_dictionary_from_sweep_030_source -- --nocapture +cargo test --test scene_generator_test generator_writes_real_sweep_030_org_dictionary_from_embedded_source -- --nocapture +cargo test --test scene_generator_test generator_writes_multi_mode_package_from_deterministic_analysis -- --nocapture +``` + +## Route-Local Notes + +- The source-driven slice is intentionally bounded to extraction and generation. +- The route does not yet preserve the full original organization tree structure. +- Alias normalization remains conservative; broader alias-generation is reserved for the dedicated alias route. +- Existing repository warnings remain unchanged, including `dead_code` warnings in callback-host/openxml code and an existing `unreachable_code` warning in `tests/scene_generator_test.rs`. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-invocation-alias-generation-hardening-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-invocation-alias-generation-hardening-report.md new file mode 100644 index 0000000..5d376f3 --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-invocation-alias-generation-hardening-report.md @@ -0,0 +1,60 @@ +# Generated Scene Invocation Alias Generation Hardening Report + +Date: 2026-04-21 + +Parent plan: + +- `docs/superpowers/plans/2026-04-20-generated-scene-invocation-alias-generation-hardening-plan.md` + +## Scope + +This route implemented the first reusable alias-generation slice for generated scene deterministic manifests. + +It did not change runtime dispatch scoring, service-console behavior, final materialized skill files, board assets, or full rematerialization output. + +## Implementation + +Changed files: + +- `src/generated_scene/generator.rs` +- `tests/scene_generator_test.rs` + +The generator now builds deterministic `include_keywords` from: + +- the canonical scene name +- existing page-title keywords +- punctuation/connector-normalized scene names +- month/week split aliases for names containing combined `月_周累计` style wording +- compact line-loss aliases such as `台区线损` and `线损大数据` + +This turns the `sweep-030-scene` debugging lesson into a reusable generation rule instead of a hand-edited `scene.toml` patch. + +## Anchor Result + +For `sweep-030-scene`, route-local generation now emits aliases matching real operator wording, including: + +- `线损大数据 月累计线损统计分析` +- `线损大数据 周累计线损统计分析` +- `台区线损` + +These aliases are generated from the scene name semantics rather than being patched directly into the final materialized skill. + +## Verification + +Passed: + +```powershell +cargo test --test scene_generator_test generator_writes_real_sweep_030_org_dictionary_from_embedded_source -- --nocapture +cargo test --test scene_generator_modes_test -- --nocapture +``` + +Known existing warnings were not addressed by this route: + +- callback/openxml/generated_scene `dead_code` +- `scene_generator_test.rs` `unreachable_code` + +## Stop Statement + +The route stops after the first reusable alias-generation slice. + +It does not claim full closure of every alias gap in the 84-scene ledger bucket. The next planned step is rematerialization refresh, followed by validation refresh. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-report.md new file mode 100644 index 0000000..fa221aa --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-report.md @@ -0,0 +1,96 @@ +# Generated Scene Local-Doc Pipeline Residual Closure Report + +Date: 2026-04-21 + +Plan: + +- `docs/superpowers/plans/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-plan.md` + +## Scope + +This route closed only the bounded `local_doc_pipeline` residual evidence slice for the six residual scenes left after runtime-semantics rematerialization validation refresh. + +In scope: + +1. `sweep-025-scene` +2. `sweep-047-scene` +3. `sweep-050-scene` +4. `sweep-052-scene` +5. `sweep-062-scene` +6. `sweep-087-scene` + +Out of scope: + +1. no rematerialization rerun +2. no validation refresh rerun +3. no edits to generated skill bundles under `examples/` +4. no runtime, service-console, browser, or board updates + +## Implemented Slice + +The analyzer now recovers additional reusable local-document workflow evidence shapes: + +- `exportImageDocs` +- `exportWordFile` +- `uploadWord` +- `setWord` +- `aSaveFile` +- `mammoth.convertToHtml` +- `faultDetailsExportXLSX` +- `api/genword` +- `/docxs/` +- `ReportServices/Api/readeFile` + +These are normalized into existing local-doc actions such as: + +- `docExport` +- `docTemplateTransform` +- `reportFileOpen` + +The existing fail-closed guard remains in place: a standalone export signal without local report/query evidence is still not enough to classify a complete `local_doc_pipeline` contract. + +## Results + +| Metric | Count | +| --- | ---: | +| scoped residual scenes | 6 | +| analyzer evidence recovered | 6 | +| generator package recovery validated | 6 | +| rematerialization rerun | 0 | +| validation refresh rerun | 0 | + +## Scene Closure Notes + +| Scene | Recovered basis | +| --- | --- | +| `sweep-025-scene` | fault details XLSX export plus report log/local file evidence | +| `sweep-047-scene` | docx template transform and `exportImageDocs`/local report log evidence | +| `sweep-050-scene` | webpack-bundled `uploadWord`/`setWord`/`aSaveFile` document export evidence plus report log | +| `sweep-052-scene` | docx template path, `exportWord`/`exportImageDocs`, and report log evidence | +| `sweep-062-scene` | docx template path, `exportWord`/`exportImageDocs`, and report log evidence | +| `sweep-087-scene` | `api/genword` plus `aSaveFile`/report log document generation evidence | + +## Validation + +Passed: + +```powershell +cargo test --test scene_generator_test analyzer_recovers_local_doc_residual_export_workflow_evidence -- --nocapture +cargo test --test scene_generator_test generator_recovers_local_doc_residual_packages_from_source_evidence -- --nocapture +cargo test --test scene_generator_test generator_writes_g8_local_doc_pipeline_package -- --nocapture +cargo test --test scene_generator_test generator_blocks_incomplete_g8_local_doc_pipeline_contract -- --nocapture +cargo test --test scene_generator_test generator_accepts_g8_local_doc_select_data_contract -- --nocapture +``` + +Known unrelated warnings remain: + +- existing `dead_code` warnings in callback-host/openxml/generator code +- existing `unreachable_code` warning in `tests/scene_generator_test.rs` + +## Output Asset + +- `tests/fixtures/generated_scene/generated_scene_local_doc_pipeline_residual_closure_followup_2026-04-21.json` + +## Stop Statement + +This route stops after the bounded local-doc residual closure slice and route-local assets. Full bundle closure still requires downstream rematerialization and validation refresh. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-parameter-default-semantics-hardening-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-parameter-default-semantics-hardening-report.md new file mode 100644 index 0000000..bed34a9 --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-parameter-default-semantics-hardening-report.md @@ -0,0 +1,51 @@ +# Generated Scene Parameter Default Semantics Hardening Report + +## Scope + +Executed the first reusable slice defined by `2026-04-20-generated-scene-parameter-default-semantics-hardening-plan.md`. + +Bounded scope: + +- source-side recovery of default period semantics for parameterized monthly/weekly scenes +- generator-side preservation of recovered default strategy in param metadata +- route-local validation on the `sweep-030-scene` anchor + +Out of scope: + +- runtime resolver logic changes +- materialized skill refresh under `examples/` +- 102-scene rematerialization +- validation refresh + +## Outcome + +This route now preserves source-derived default period semantics in generated parameter metadata. + +The first reusable slice is closed for the route anchor: + +- `extract_deterministic_scene_facts(...)` now emits `period_default_strategy` +- line-loss style month/week source evidence recovers `lineloss_page_semantics` +- generated `month_week_period` params now write `resolver_config.default_strategy` +- real `sweep-030-scene` generation writes `default_strategy = "lineloss_page_semantics"` into `scene.toml` + +## Files Changed + +- `src/generated_scene/analyzer.rs` +- `src/generated_scene/generator.rs` +- `tests/scene_generator_test.rs` + +## Verification + +Passed: + +```powershell +cargo test --test scene_generator_test analyzer_recovers_lineloss_period_default_strategy_from_source -- --nocapture +cargo test --test scene_generator_test generator_writes_real_sweep_030_org_dictionary_from_embedded_source -- --nocapture +cargo test --test scene_generator_modes_test -- --nocapture +``` + +## Route-Local Notes + +- The route preserves generation metadata only; runtime period resolution logic was not modified here. +- The first slice is intentionally limited to the recovered line-loss page semantics pattern. +- Existing repository warnings remain unchanged, including `dead_code` warnings in callback-host/openxml code and an existing `unreachable_code` warning in `tests/scene_generator_test.rs`. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-full-direct-mock-execution-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-full-direct-mock-execution-report.md new file mode 100644 index 0000000..657fa82 --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-full-direct-mock-execution-report.md @@ -0,0 +1,22 @@ +# Scene Skill 102 Runtime Semantics Full Direct Mock Execution Report + +> Date: 2026-04-21 +> Plan: `2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md` + +## Summary + +| Status | Count | +| --- | ---: | +| `direct-mock-pass` | 102 | + +## By Archetype + +| Archetype | Result | +| --- | --- | +| `host_bridge_workflow` | direct-mock-pass: 7 | +| `local_doc_pipeline` | direct-mock-pass: 40 | +| `multi_endpoint_inventory` | direct-mock-pass: 2 | +| `multi_mode_request` | direct-mock-pass: 10 | +| `paginated_enrichment` | direct-mock-pass: 43 | + +This is a mock-only validation run. It does not prove production execution. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-report.md new file mode 100644 index 0000000..6fe5fae --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-report.md @@ -0,0 +1,73 @@ +# Generated Scene Runtime Semantics Offline Validation Bundle Refresh Report + +Date: 2026-04-21 + +Plan: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-plan.md` + +## Summary + +| Metric | Count | +| --- | ---: | +| `skillsCopied` | 102 | +| `selectedBatch` | 7 | +| `rematerializationFailures` | 0 | +| `missingRequiredFiles` | 0 | +| `invalidSceneToml` | 0 | +| `batchEntries` | 7 | + +## Bundle + +- `dist/sgclaw_102_runtime_semantics_validation_bundle_2026-04-21` + +## Source + +- `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` + +## Included Assets + +- `sg_claw.exe` +- `skills/` with 102 refreshed runtime-semantics skill packages +- `README.md` +- `BATCH_001.md` +- `BUNDLE_MANIFEST.json` +- `docs/SCENE_INDEX.md` +- `docs/scene_skill_102_index.json` +- refreshed validation and pseudo-production handoff assets under `handoff/` +- optional business-target rules assets under `resources/` +- empty `results/` and `evidence/` collection directories + +## Selected Batch + +- `sweep-001-scene` / 95598、12398、流程超期风险工单明细 + +- `sweep-002-scene` / 95598、12398及配网设备监控情况周统计 + +- `sweep-003-scene` / 95598业务处理满意率统计日报 + +- `sweep-004-scene` / 95598供电服务月报 + +- `sweep-020-scene` / 供电所线路电量统计 + +- `sweep-023-scene` / 供电质量看板-武威 + +- `sweep-084-scene` / 计量数据助手 + +## Validation + +The refreshed bundle was structurally validated: + +1. `skills/` contains 102 scene directories. +2. Every skill directory contains `SKILL.toml`, `SKILL.md`, `scene.toml`, and `scripts/`. +3. Every copied `scene.toml` parses as TOML. +4. Critical JSON assets parse successfully. +5. `BATCH_001.md` entry count matches the refreshed handoff selected batch count. +6. Generated bundle docs do not contain detected secret material. +7. `BATCH_001.md` task inputs use real `U+3002 x3` deterministic suffix, not placeholder text. + +## Stop Statement + +Offline validation bundle refresh stops here. + +No browser, inner-network, production network, credentialed session, or pseudo-production execution was run. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-report.md new file mode 100644 index 0000000..07e3764 --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-report.md @@ -0,0 +1,82 @@ +# Generated Scene Runtime Semantics Post-Refresh Residual Closure Report + +Date: 2026-04-21 + +Plan: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-plan.md` + +## Scope + +This closure handled only the two post-refresh residuals exposed by the 2026-04-21 validation refresh: + +1. deterministic suffix regression in rematerialized `scene.toml` +2. invalid TOML generation for `sweep-078-scene` + +No rematerialization refresh, validation refresh, pseudo-production execution, runtime change, service-console change, or board update was executed. + +## Changes + +### Deterministic suffix regression + +`render_scene_toml` now emits: + +```toml +suffix = "。。。" +``` + +instead of using the scene name as the deterministic suffix. + +### TOML string escaping + +`escape_toml` now escapes: + +1. backslash +2. double quote +3. newline +4. carriage return +5. tab +6. remaining control characters + +This prevents request mapping fields or other generated scalar strings from breaking TOML syntax when source evidence contains comments, embedded line breaks, or malformed text fragments. + +## Files Changed + +- `src/generated_scene/generator.rs` +- `tests/scene_generator_test.rs` + +## Route-Local Asset + +- `tests/fixtures/generated_scene/generated_scene_runtime_semantics_post_refresh_residual_closure_followup_2026-04-21.json` + +## Validation + +Passed: + +```powershell +cargo test --test scene_generator_test generator_writes_real_sweep_030_org_dictionary_from_embedded_source -- --nocapture +cargo test --test scene_generator_test generator_escapes_request_mapping_fields_for_valid_toml -- --nocapture +``` + +The first test proves the generated `sweep-030-scene` `scene.toml` now preserves the hardened facts and emits `suffix = "。。。"`. + +The second test proves the generated `sweep-078-scene` `scene.toml` parses as TOML after escaping. + +## Residuals Not Closed Here + +The `6` `local_doc_pipeline` rematerialization residuals were not addressed in this plan: + +1. `sweep-025-scene` +2. `sweep-047-scene` +3. `sweep-050-scene` +4. `sweep-052-scene` +5. `sweep-062-scene` +6. `sweep-087-scene` + +They require a separate bounded route if they remain in scope. + +## Next Step + +Rerun the runtime-semantics rematerialization execution plan, then rerun validation refresh against the newly generated bundle. + +Do not proceed directly to pseudo-production from the previous 2026-04-21 refreshed bundle. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-report.md new file mode 100644 index 0000000..ba0ae1a --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-report.md @@ -0,0 +1,44 @@ +# Generated Scene Runtime Semantics Rematerialization Execution Report + +Date: 2026-04-21 + +Plan: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md` + +## Rerun Reason + +This rerun verifies the bounded `local_doc_pipeline` residual closure: + +- `docs/superpowers/plans/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-plan.md` + +## Result + +| Metric | Count | +| --- | ---: | +| total scenes | 102 | +| attempted | 102 | +| skill directories | 102 | +| materialized | 102 | +| failed | 0 | + +The six previously blocked `local_doc_pipeline` residuals are now materialized in the refreshed bundle: + +- `sweep-025-scene` +- `sweep-047-scene` +- `sweep-050-scene` +- `sweep-052-scene` +- `sweep-062-scene` +- `sweep-087-scene` + +## Output Assets + +- `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` +- `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +- `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_failures_2026-04-21.json` + +## Boundary + +This plan only reran rematerialization. Validation refresh is reported separately in: + +- `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md new file mode 100644 index 0000000..91e1a33 --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md @@ -0,0 +1,45 @@ +# Generated Scene Runtime Semantics Validation Refresh Execution Report + +Date: 2026-04-21 + +Plan: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md` + +## Summary + +| Metric | Count | +| --- | ---: | +| `totalScenes` | 102 | +| `materialized` | 102 | +| `rematerializationFailed` | 0 | +| `deterministicReady` | 102 | +| `deterministicBlocked` | 0 | +| `staticPass` | 102 | +| `staticFail` | 0 | +| `directMockPass` | 102 | +| `directMockFail` | 0 | +| `pseudoProdSelected` | 7 | + +## Result + +The validation refresh consumed the rerun 2026-04-21 runtime-semantics rematerialization bundle. The previous six `local_doc_pipeline` rematerialization residuals are no longer present in the refreshed bundle. + +The deterministic suffix check uses explicit `U+3002 x3` codepoint validation to avoid console encoding false negatives. + +No production browser, production network, credentials, or pseudo-production execution was used. + +## Output Assets + +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_deterministic_invocation_readiness_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_static_validation_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_natural_language_parameter_readiness_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_natural_language_invocation_samples_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_execution_handoff_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_evidence_checklist_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_execution_record_template_2026-04-21.json` +- `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_full_direct_mock_execution_2026-04-21.json` + +## Stop Statement + +Validation refresh stops here. No pseudo-production execution was run. diff --git a/docs/superpowers/reports/2026-04-21-generated-scene-runtime-url-classification-hardening-report.md b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-url-classification-hardening-report.md new file mode 100644 index 0000000..df4ad60 --- /dev/null +++ b/docs/superpowers/reports/2026-04-21-generated-scene-runtime-url-classification-hardening-report.md @@ -0,0 +1,68 @@ +# Generated Scene Runtime URL Classification Hardening Report + +- Date: `2026-04-21` +- Route: `runtime_url_classification_hardening` +- Status: `completed` + +## Outcome + +This route implemented the first reusable URL-classification slice in the generated-scene pipeline. + +Generated metadata can now distinguish: + +- runtime/app-entry URL +- module-route URL +- target URL kind + +The first bounded slice was propagated through: + +- analyzer bootstrap classification +- `SceneIr.bootstrap` +- rendered `scene.toml` +- structured evidence payload +- generation report markdown + +## Evidence + +Two route-local examples now carry differentiated URL roles: + +1. `report_collection` fixture + - `target_url = http://20.76.57.61:18080/gsllys` + - `app_entry_url = http://20.76.57.61:18080/gsllys` + - `module_route_url = http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor` + - `target_url_kind = runtime_context` + +2. `multi_mode` package fixture + - `target_url = http://20.76.57.61:18080/gsllys` + - `app_entry_url = http://20.76.57.61:18080/gsllys` + - `module_route_url = http://20.76.57.61:18080/gsllys/monthReport` + - `target_url_kind = runtime_context` + +This is the intended first route-local correction for scenes like `sweep-030-scene`, where runtime page binding semantics diverge from deeper module-route evidence. + +## Verification + +Passed: + +```powershell +cargo test --test scene_generator_test analyzer_classifies_supported_report_collection_source -- --nocapture +cargo test --test scene_generator_test generator_writes_multi_mode_package_with_generation_report -- --nocapture +cargo test --test scene_generator_test generator_writes_multi_mode_package_from_deterministic_analysis -- --nocapture +cargo test --test scene_generator_modes_test -- --nocapture +``` + +Residual warnings remained outside this route scope: + +- existing `dead_code` warnings in callback-host/openxml/generator helpers +- existing `unreachable_code` warning in `tests/scene_generator_test.rs` + +## Stop Statement + +This route stopped after the first reusable URL-classification slice was implemented and verified. + +It did not: + +- rematerialize all `102` scenes +- refresh validation assets +- change runtime/callback-host behavior +- edit generated skills directly diff --git a/docs/superpowers/specs/2026-04-14-request-url-resolution-design.md b/docs/superpowers/specs/2026-04-14-request-url-resolution-design.md new file mode 100644 index 0000000..adb90fd --- /dev/null +++ b/docs/superpowers/specs/2026-04-14-request-url-resolution-design.md @@ -0,0 +1,338 @@ +# Request URL Resolution Design + +**Goal:** Replace the temporary line-loss hardcoded request URL logic in `src/service/server.rs` with a single bootstrap-target resolution path that prefers current page context first, then deterministic submit results, then skill-owned metadata, and only finally falls back to `about:blank`. + +**Status:** Approved design direction for the next slice. + +--- + +## Problem + +The current callback-host bootstrap path still derives the first helper-page request URL in `src/service/server.rs`: + +- `initial_request_url_for_submit_task(...)` prefers `request.page_url` +- then `derive_request_url_from_instruction(...)` +- then falls back to `about:blank` + +This is currently patched with a temporary line-loss branch: + +- if instruction contains `线损` or `lineloss` +- return `http://20.76.57.61:18080` + +That temporary branch is the wrong ownership boundary: + +1. service code is guessing scene intent from raw instruction text +2. deterministic submit already has a structured execution plan with `target_url` +3. future direct-submit skills may also need a bootstrap URL, but should not require new Rust hardcoded branches every time + +The result is duplicated routing knowledge and brittle request URL derivation. + +--- + +## Decision Summary + +1. Introduce one sgClaw-owned bootstrap target resolver for service submit bootstrap. +2. Resolution order is: + - explicit `request.page_url` + - deterministic submit execution plan + - skill metadata fallback + - `about:blank` +3. Deterministic submit is the primary source of truth for deterministic scenes such as line loss. +4. Skill metadata provides the compatibility fallback for direct browser-script skills that do not have a deterministic plan. +5. Remove the current line-loss text-match hardcode from `src/service/server.rs` once the resolver is in place. + +--- + +## Recommended Architecture + +### 1. Add a dedicated bootstrap-target result type + +Introduce a small sgClaw-side result type dedicated to callback-host bootstrap only. + +Recommended shape: + +```rust +pub struct SubmitBootstrapTarget { + pub request_url: String, + pub expected_domain: Option, + pub source: BootstrapTargetSource, +} + +pub enum BootstrapTargetSource { + PageContext, + DeterministicPlan, + SkillConfig, + Fallback, +} +``` + +This type should remain intentionally small. + +It is **not** a generic execution-plan object. Its only job is to answer: +- what URL should the helper page bootstrap against on first submit? +- where did that value come from? + +Keeping this object narrow avoids coupling callback-host bootstrap to unrelated execution details. + +--- + +### 2. Move URL derivation into one resolver + +Replace the current `initial_request_url_for_submit_task(...)` branching with a single resolver, conceptually: + +1. If `SubmitTaskRequest.page_url` exists and is non-empty, use it. +2. Else attempt deterministic parsing through `decide_deterministic_submit(...)`. + - If it returns `Execute(plan)`, use `plan.target_url`. +3. Else inspect configured direct-submit skill metadata. + - If metadata exposes a bootstrap URL, use it. +4. Else return `about:blank`. + +This keeps service bootstrap logic declarative and removes scene-specific guessing from `server.rs`. + +--- + +### 3. Deterministic submit becomes the primary truth for line loss + +`src/compat/deterministic_submit.rs` already contains structured line-loss routing data: + +- `DeterministicExecutionPlan.expected_domain` +- `DeterministicExecutionPlan.target_url` + +For line-loss requests, service bootstrap should use `plan.target_url` rather than reconstructing or hardcoding a URL in `server.rs`. + +That means the current temporary branch: + +```rust +if instruction.contains("线损") || instruction.contains("lineloss") { + return Some("http://20.76.57.61:18080".to_string()); +} +``` + +should disappear entirely after the resolver is introduced. + +This is the cleanest fix because the deterministic parser already owns the scene contract. + +--- + +### 4. Skill metadata is the fallback, not the primary owner + +For non-deterministic direct browser-script skills, service may still need a bootstrap URL even when there is no page context. + +The fallback should come from skill-owned metadata with minimal fields: + +- `bootstrap_url` +- `expected_domain` + +Recommended semantics: + +- `bootstrap_url`: the page URL service should use when opening the helper/bootstrap context +- `expected_domain`: the hostname direct runtime can use when page context is absent + +This metadata should only be consulted **after** page context and deterministic parsing fail. + +That preserves the user-selected policy: +- deterministic plan first +- skill metadata second + +It also avoids forcing deterministic scenes to duplicate already-structured routing data in skill config. + +--- + +## Ownership Boundary + +### sgClaw owns bootstrap resolution policy + +sgClaw should own the policy and precedence order for request URL resolution. + +That includes: +- checking current page context first +- deciding when deterministic parsing is authoritative +- deciding when skill metadata is an allowed fallback +- falling back to `about:blank` when nothing else resolves + +This policy belongs in sgClaw because it is part of submit-path orchestration, not part of an individual skill script. + +### deterministic submit owns deterministic scene targets + +If a scene already resolves to `DeterministicExecutionPlan`, then that plan owns: +- the authoritative `target_url` +- the authoritative `expected_domain` + +Service should consume that plan rather than re-deriving equivalent information from raw text. + +### skill metadata owns direct-skill bootstrap hints + +When there is no deterministic plan, the skill package may own the minimal hints needed for bootstrap compatibility: +- `bootstrap_url` +- `expected_domain` + +The skill should not own resolution precedence. It only provides data for the fallback tier. + +--- + +## File-Level Design + +### `src/service/server.rs` + +Change responsibilities here from “derive request URL by ad hoc branch logic” to “ask the resolver for a bootstrap target”. + +Expected changes: +- replace `initial_request_url_for_submit_task(...)` logic with a call into a resolver +- delete `derive_request_url_from_instruction(...)` or reduce it to thin legacy glue during migration +- remove the line-loss text-match hardcode entirely +- keep callback-host startup logic unchanged apart from where `bootstrap_url` comes from + +### `src/compat/deterministic_submit.rs` + +No routing policy should move into service from this module. + +Expected role in the new design: +- continue producing `DeterministicExecutionPlan` +- expose enough information for service bootstrap resolution to reuse `plan.target_url` +- remain the source of truth for deterministic line-loss target selection + +### `src/compat/direct_skill_runtime.rs` + +This module currently resolves skill tool execution and derives `expected_domain` from task context. + +For this slice, it does **not** need a full behavior rewrite. + +Expected role: +- add or expose a helper for reading direct skill metadata if service needs it +- keep runtime execution behavior stable unless required by the new metadata seam + +A later slice may allow runtime execution to use skill-owned `expected_domain` fallback too, but that is not required to land this service TODO. + +### `src/config/settings.rs` + +If sgClaw configuration needs to point to direct-skill metadata or enable fallback behavior explicitly, add the minimum structure here. + +However, this slice should avoid creating a second parallel source of target URLs inside sgClaw config if the same information can be read from skill metadata. + +The key rule is: +- do not replace one hardcode with a different hardcoded config map inside `settings.rs` + +### Skill metadata loading seam + +The design assumes a small read path that can answer: +- for the configured direct-submit skill, is there a `bootstrap_url`? +- is there an `expected_domain`? + +The exact storage location can follow the existing staged-skill packaging model, but the new metadata should remain minimal and execution-adjacent rather than introducing a new wide dispatch schema. + +--- + +## Data Flow + +### Current desired flow + +1. service receives `ClientMessage::SubmitTask` +2. service converts it to `SubmitTaskRequest` +3. service resolves `SubmitBootstrapTarget` +4. service passes `SubmitBootstrapTarget.request_url` into `LiveBrowserCallbackHost::start_with_browser_ws_url(...)` +5. callback-host bootstraps helper page using that URL +6. remaining task execution continues unchanged + +### Resolution behavior examples + +#### Case A: page context exists +- request includes `page_url=https://www.zhihu.com` +- resolver returns `PageContext` +- service uses that URL directly + +#### Case B: line-loss deterministic request, no page context +- request has no `page_url` +- deterministic parser returns `Execute(plan)` +- resolver returns `DeterministicPlan` with `request_url=plan.target_url` +- service uses line-loss target URL from the plan + +#### Case C: direct-submit skill with configured bootstrap URL, no page context, not deterministic +- request has no `page_url` +- deterministic parser returns `NotDeterministic` +- configured direct skill metadata exposes `bootstrap_url` +- resolver returns `SkillConfig` + +#### Case D: nothing resolves +- no page context +- no deterministic plan +- no skill metadata bootstrap URL +- resolver returns `Fallback` with `about:blank` + +--- + +## Testing Strategy + +### Resolver-focused tests + +Add focused tests covering precedence: + +1. `page_url` wins over everything else +2. deterministic line-loss `Execute(plan)` wins when `page_url` is absent +3. skill metadata fallback is used only when no deterministic plan exists +4. `about:blank` remains the terminal fallback + +### Regression coverage for the removed TODO + +Add a regression proving that the service no longer depends on: +- `instruction.contains("线损")` +- `instruction.contains("lineloss")` + +The line-loss bootstrap URL should now come from the deterministic plan only. + +### Direct skill fallback tests + +Add tests for: +- configured skill metadata with valid `bootstrap_url` +- missing `bootstrap_url` +- malformed `bootstrap_url` +- mismatch between metadata and current page context precedence + +Malformed `bootstrap_url` metadata should be treated as unusable fallback data rather than a hard error for service bootstrap resolution: +- if page context exists, page context still wins +- if deterministic plan exists, deterministic plan still wins +- if malformed metadata is the only candidate, resolver should ignore it and fall through to `about:blank` + +### Existing callback-host tests remain stable + +Do not redesign callback-host behavior in this slice. + +The callback-host tests should only need enough updates to reflect the new bootstrap URL source, not a new helper lifecycle contract. + +--- + +## Migration Plan Shape + +Recommended implementation order: + +1. Introduce the bootstrap-target resolver and narrow result type. +2. Wire deterministic line-loss resolution into it using `DeterministicExecutionPlan.target_url`. +3. Remove the temporary line-loss hardcode from `server.rs`. +4. Add skill metadata fallback for configured direct-submit skills. +5. Expand tests to lock precedence and fallback behavior. + +This order lands the TODO removal early without forcing the full fallback design to be implemented blindly first. + +--- + +## Explicit Non-Goals + +This slice does **not**: +- redesign callback-host lifecycle +- redesign deterministic scene parsing +- redesign direct-submit routing ownership +- introduce a broad scene registry for request URL derivation +- change browser command protocol +- rewrite direct skill execution behavior beyond what is needed for metadata lookup +- replace all current uses of page context with skill metadata + +--- + +## Design Rule + +For service bootstrap request URL resolution: + +- current page context stays first +- deterministic execution plans are the authoritative source for deterministic scenes +- skill metadata provides a narrow fallback for non-deterministic direct skills +- `about:blank` remains the final fallback +- `src/service/server.rs` must not contain scene-specific text-match hardcodes such as the current line-loss TODO diff --git a/docs/superpowers/specs/2026-04-15-generated-scene-skill-platform-design.md b/docs/superpowers/specs/2026-04-15-generated-scene-skill-platform-design.md new file mode 100644 index 0000000..3aff778 --- /dev/null +++ b/docs/superpowers/specs/2026-04-15-generated-scene-skill-platform-design.md @@ -0,0 +1,754 @@ +# Generated Scene Skill Platform Design + +**Goal:** Evolve `sgClaw` from one-off business-scene integrations into a platform that can generate, register, and invoke staged scene skills through a generic runtime path, while keeping v1 implementation strictly limited to report/collection-oriented `browser_script` scenes. + +**Status:** Approved brainstorming direction for formal specification. + +--- + +## Decision Summary + +1. `sgClaw` should become a scene-skill platform, not a growing set of per-scene Rust branches. +2. V1 should support only report/collection-oriented `browser_script` scenes generated from existing scenario directories. +3. The generated output must include both the staged skill package and a platform registration manifest so that new scenes can be discovered and invoked with minimal or zero per-scene Rust changes. +4. In the intranet near term, deterministic mode remains the explicit `。。。` suffix path; no model is required for v1 invocation. +5. The design must preserve the existing main architecture, stay close to the current `browser_script` and artifact pipeline, and avoid platform changes that drift into a general workflow engine. +6. The implementation should happen on a new branch copied from `ws`, not directly inside the current `ws` branch. +7. The generator and runtime must be separated by explicit contracts so the generator can later be extracted into a standalone project. +8. The platform design must turn the full `tq-lineloss-report` lessons learned into durable documentation and generator input rules so future generated skills do not repeat the same mistakes. + +--- + +## Hard Constraints + +### 1. Extensibility is mandatory + +The platform must support future extension without forcing redesign of the core contracts. The design must leave clean seams for: + +- additional scene types +- additional deterministic matchers +- additional parameter resolver types +- additional tool invokers beyond `browser_script` +- future LLM semantic routing on top of the same registered scene contracts +- future extraction of the generator into a separate project + +### 2. Stay on the main line + +The core objective is: + +- generate staged scene skills from existing scenario directories +- register them automatically +- invoke them through a generic deterministic runtime path + +The design must not drift into: + +- a full low-code workflow engine +- a general browser RPA authoring platform +- a full login/session orchestration platform in v1 +- a broad runtime rewrite unrelated to generated scene skill support + +### 3. Preserve the current architecture theme + +The design should reuse and generalize the parts of `sgClaw` that already look platform-like: + +- skills discovery/loading +- `browser_script` execution seams +- artifact interpretation +- export/postprocess seams +- bootstrap target resolution seams + +It must avoid large theme-breaking rewrites of the runtime unless a generic platform seam truly requires them. + +### 4. Execution branch strategy + +This work is large enough that implementation should not land directly on the active `ws` branch. The future implementation plan must explicitly require: + +- start from the current `ws` branch state +- create a new branch copied from `ws` +- perform platform conversion there +- preserve `ws` as the stable reference baseline during the migration + +### 5. Generator extraction must remain possible + +The generator should not be tightly coupled to `claw-new` internals. The boundary between runtime and generator must be a stable package/manifest contract so the generator can later move into a separate project without redesigning registered scene skills. + +### 6. `tq-lineloss-report` lessons learned must become first-class inputs + +The design must require a durable lessons-learned document derived from the full `tq-lineloss-report` path, including deterministic routing, canonical parameterization, bootstrap targets, pipe/ws differences, timeout chains, artifact contracts, and Rust-side export constraints. + +This document is not an appendix. It is a required generator-design input and future template hardening source. + +The document must be split into two layers so it remains enforceable instead of becoming loose prose: + +- a structured machine-consumable lessons artifact that generator templates can read or reference deterministically +- a human-oriented narrative/analysis document explaining the why, trade-offs, and debugging history behind those lessons + +### 7. Use the superpowers process end-to-end + +This design must be carried through the superpowers flow: + +- brainstorming +- formal spec +- review loop +- user review +- implementation planning + +### 8. Think through the details before implementation + +The spec must make the critical details explicit now so execution does not discover foundational contract problems halfway through. + +--- + +## Why This Platform Exists + +The current line-loss integration proves that `sgClaw` can support a staged business scene, but it also exposes the current architecture problem: + +- the staged skill package exists and is useful +- the `browser_script` execution seam exists and is useful +- the runtime has some generic pieces already +- but deterministic routing, parameter normalization, bootstrap target selection, and scene-specific invocation are still too tied to one-off Rust code + +Examples visible in the current code: + +- `src/compat/deterministic_submit.rs` hardcodes the line-loss suffix route, target URL, host, scene matcher, org resolver, and period resolver +- `src/service/server.rs:453` already has a more general bootstrap-target seam, but it still delegates deterministic planning to scene-specific logic +- `src/compat/direct_skill_runtime.rs:148` already knows how to resolve and execute a `browser_script` tool from the skills directory, which is a strong existing platform primitive +- `src/runtime/engine.rs:232` already has multi-directory runtime skill loading and browser-surface-aware filtering, which is another platform primitive + +The design goal is to promote the reusable parts into a stable platform and move scene-specific behavior into generated packages plus scene manifests. + +--- + +## V1 Scope + +### In scope + +V1 is strictly limited to report/collection-oriented `browser_script` scenes generated from existing scenario directories. + +That means: + +- input source is an existing scenario directory containing page assets and business JS logic +- generated output is a staged skill package plus a platform registration manifest +- runtime invocation uses deterministic `。。。` routing only +- execution reuses the existing `browser_script` invocation chain +- output is a structured report artifact plus optional generic report postprocessing such as local XLSX export/open + +### Out of scope + +V1 does **not** include: + +- generic action/authoring scenes such as navigation, form filling, publishing, or editor automation +- arbitrary multi-step workflow orchestration +- session/login orchestration as a generic platform capability +- non-`browser_script` tool generation +- full LLM semantic scene routing implementation +- a universal low-code engine + +### Spec-level future seams + +The spec **must** define extension interfaces for future use, but those extensions are not part of v1 implementation: + +- matcher extension seam for future LLM semantic selection +- resolver extension seam for more complex domain parsing +- invoker extension seam for new tool kinds +- artifact interpreter extension seam for non-report results +- postprocessor extension seam beyond report export/open +- generator packaging seam for future project extraction + +--- + +## Platform Architecture + +The recommended platform has five units. + +### 1. Scene Source Analyzer + +Input: + +- an existing scenario directory +- typical source artifacts such as `index.html`, `js/*`, business requests, export calls, state dictionaries, and target pages + +Responsibility: + +- inspect source structure and collect candidate scene metadata +- identify the likely business page URL/domain +- identify likely collection mode (report/collection in v1) +- extract request-shape hints, output table hints, export/report-log hints, and page dependencies +- record uncertainty instead of guessing when source evidence is incomplete + +This unit is analysis-only. It does not perform runtime registration or invocation. + +### 2. Skill Generator + +Input: + +- analyzed scene source description +- generator templates +- lessons-learned rules derived from existing scenes such as `tq-lineloss-report` + +Output: + +- staged skill package +- platform registration manifest +- generated references and contract docs + +Generated package contents for v1: + +- `SKILL.toml` +- `SKILL.md` +- `references/collection-flow.md` +- `references/data-quality.md` +- `scripts/*.js` +- `scripts/*.test.js` +- optional scene snapshot assets +- `scene.toml` + +The generator is responsible for producing complete registration-ready output, not just scaffolding files. + +### 3. Scene Registry Loader + +Responsibility: + +- scan staged skill directories +- locate `scene.toml` +- validate scene registration contracts +- register scenes into a unified runtime registry + +This replaces the long-term need for per-scene Rust wiring. + +The existing runtime already has useful loading primitives in `src/runtime/engine.rs:361` and skill-dir normalization in `src/compat/config_adapter.rs:90`. V1 should build on those instead of replacing them. + +### 4. Generic Deterministic Dispatcher + +Responsibility: + +- activate only when the raw instruction ends with `。。。` +- iterate registered scenes, not hardcoded scene branches +- evaluate deterministic match rules declared in `scene.toml` +- resolve required canonical parameters using platform resolver types +- produce either: + - mismatch / unsupported-scene prompt + - missing/ambiguous parameter prompt + - executable scene invocation plan + +#### Multi-match and precedence rules + +Extensibility means multiple registered scenes may match the same deterministic request. The platform must define this explicitly instead of allowing hidden first-match behavior. + +Design rules: + +- deterministic dispatch must score candidate scenes through declared match signals rather than raw file-load order +- higher-confidence signals may include page URL/title context, explicit include/exclude keyword fit, and resolver success for required parameters +- plain keyword overlap alone is not sufficient justification for silently choosing one scene when another remains plausible +- if two or more scenes remain materially plausible after deterministic scoring and required-parameter evaluation, the dispatcher must fail closed with an explicit ambiguity prompt rather than guessing +- the future implementation plan must lock the scoring and tie-break order in tests +- bootstrap/page-context signals are allowed to participate in disambiguation, but they must be declared and explainable + +This keeps the system extensible without turning new scenes into routing contradictions. + +This should replace scene-specific logic currently concentrated in `src/compat/deterministic_submit.rs`. + +### 5. Generic Execution Pipeline + +Responsibility: + +- invoke the resolved tool through the existing `browser_script` seam +- reuse bootstrap target resolution +- interpret the artifact according to the registered artifact contract +- run generic report postprocessing such as Rust-side XLSX export +- keep business-specific interpretation out of the platform core + +The strong requirement is to preserve the already-validated common path in: + +- `src/compat/direct_skill_runtime.rs` +- `src/compat/browser_script_skill_tool.rs` +- the existing report-artifact and export seams + +--- + +## Scene Registration Contract + +The central platform contract is a per-scene registration manifest, named `scene.toml` in this design. + +### Why a separate manifest is needed + +`SKILL.toml` describes tools. It does not fully describe: + +- deterministic routing rules +- scene identity +- platform parameter resolution contracts +- bootstrap target rules +- artifact interpretation rules +- generic postprocessing declarations + +Without this manifest, the generator would only create files while the runtime would still need scene-specific Rust changes. + +### Manifest responsibilities + +Each generated scene manifest must declare: + +1. scene identity and runtime entrypoint +2. bootstrap/page context requirements +3. deterministic matching rules +4. parameter schema and resolver mapping +5. execution contract +6. artifact contract +7. postprocess contract +8. schema/version metadata sufficient for long-term generator/runtime evolution + +### Manifest versioning and registry rules + +The manifest contract must be explicit and versioned from the start. + +Required rules: + +- every `scene.toml` must declare a manifest schema version independent from the scene version +- the runtime must validate schema compatibility before registration +- scene registration must require globally unique `scene.id` values across all loaded scene roots +- duplicate scene IDs must fail registration deterministically rather than silently overriding an earlier scene +- the future implementation plan must decide and test the duplicate policy explicitly, but the default design rule is fail-fast with a clear error describing both conflicting manifest locations +- manifest evolution must prefer additive compatibility where possible so a future standalone generator can target the same runtime contract intentionally rather than by coincidence + +This versioned contract is part of the extraction seam: it is what allows the runtime and a future standalone generator to evolve without private coupling. + +### Recommended manifest shape + +```toml +[scene] +id = "tq-lineloss-report" +skill = "tq-lineloss-report" +tool = "collect_lineloss" +kind = "browser_script" +version = "0.1.0" +category = "report_collection" + +[manifest] +schema_version = "1" + +[bootstrap] +expected_domain = "20.76.57.61" +target_url = "http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor" +page_title_keywords = ["线损"] +requires_target_page = true + +[deterministic] +suffix = "。。。" +include_keywords = ["线损", "月累计", "周累计"] +exclude_keywords = ["知乎"] + +[[params]] +name = "org" +resolver = "dictionary_entity" +required = true +prompt_missing = "已命中台区线损报表技能,但缺少供电单位。" +prompt_ambiguous = "已命中台区线损报表技能,但供电单位存在歧义,请补充更完整名称。" + +[params.resolver_config] +dictionary_ref = "references/org-dictionary.json" +output_label_field = "org_label" +output_code_field = "org_code" + +[[params]] +name = "period" +resolver = "month_week_period" +required = true +prompt_missing = "已命中台区线损报表技能,但缺少统计周期。" +prompt_ambiguous = "已命中台区线损报表技能,但统计周期存在歧义,请补充更明确表达。" + +[artifact] +type = "report-artifact" +success_status = ["ok", "partial", "empty"] +failure_status = ["blocked", "error"] + +[postprocess] +exporter = "xlsx_report" +auto_open = "excel" +``` + +### Design rule + +`scene.toml` declares behavior. It does not contain business JS code. + +- business collection logic stays in `scripts/*.js` +- platform match/resolver selection stays in the manifest +- generic runtime execution stays in the platform + +--- + +## Platform-Provided Generic Capabilities + +The platform should expose a small, explicit set of reusable capability types. + +### 1. Scene Matchers + +V1 deterministic matcher types should stay simple and declarative: + +- include keywords +- exclude keywords +- required suffix +- optional page URL/title constraints + +This is enough for v1 report scenes and avoids overbuilding NLP into deterministic mode. + +Future seam: + +- add a semantic matcher interface for model-based routing later without changing the rest of the scene contract + +### 2. Parameter Resolvers + +The platform should provide reusable resolver types instead of scene-specific branches. + +Recommended v1 resolver types: + +- `dictionary_entity` + - maps aliases to canonical label/code pairs using scene-provided dictionary data +- `month_week_period` + - parses month/week intent and canonical time payloads +- `fixed_enum` + - maps deterministic text options into fixed internal values +- `literal_passthrough` + - preserves an already explicit literal value + +Design rule: + +If a new scene needs a new resolver **type**, add a reusable platform capability. Do not add a scene-specific Rust branch. + +### 3. Bootstrap Resolvers + +The platform must be able to produce: + +- `expected_domain` +- `target_url` +- page-context validation hints + +These should come from registration metadata, not from per-scene hardcoded constants. + +This generalizes the existing bootstrap-target seam already present in `src/service/server.rs:453`. + +### 4. Tool Invokers + +V1 supports one invoker type only: + +- `browser_script` + +This is intentionally narrow. It keeps the platform close to the existing main architecture and avoids broad redesign. + +Future seam: + +- later add invokers for other tool kinds without changing scene registration concepts + +### 5. Artifact Interpreters and Postprocessors + +V1 should provide generic handling for report-style results: + +- `report-artifact` interpreter +- `xlsx_report` exporter/postprocessor +- open-after-export policies + +The platform should not know about line-loss business fields specifically. It should only know the generic artifact contract. + +--- + +## Generated Skill Package Contract + +V1 generated scenes must follow a predictable staged package shape. + +### Required generated files + +- `SKILL.toml` +- `SKILL.md` +- `scene.toml` +- `references/collection-flow.md` +- `references/data-quality.md` +- `scripts/.js` +- `scripts/.test.js` +- optional support assets such as scene snapshots + +### V1 generated skill assumptions + +Generated report/collection skills must: + +- accept normalized canonical args only +- validate expected page context before collection +- avoid re-parsing raw user language inside the script +- return one structured artifact object +- keep page/API collection logic inside the script +- leave generic interpretation/export policy to the platform where possible + +### Separation rule + +The generated skill package owns: + +- page inspection +- page-side state usage +- page/API calls +- row normalization +- scene-local docs and references + +The platform owns: + +- scene discovery and registration +- deterministic scene selection +- canonical parameter resolution using generic resolver types +- tool invocation +- artifact interpretation +- generic postprocessing + +--- + +## Migration Path from `tq-lineloss` One-Off to Platform Sample + +The current line-loss implementation should not be discarded. It should become the first migration sample and platform proof point. + +### Why line-loss is the right sample + +It already exercised most of the hard problems: + +- deterministic routing via `。。。` +- canonical org resolution +- canonical month/week resolution +- staged `browser_script` packaging +- bootstrap target selection +- report artifact shaping +- local export needs +- pipe/ws transport differences +- real browser/runtime timeout and callback-host issues + +### Phase A: Extract generic registry and invocation seams + +First, add: + +- scene registry loader +- manifest reader/validator +- generic deterministic dispatch planning + +while preserving the existing `browser_script` execution seam. + +### Phase B: Convert `tq-lineloss` into the first manifest-driven scene + +Move line-loss specific declarations out of hardcoded Rust branches and into registration data: + +- scene identity +- target URL and expected domain +- deterministic scene match rules +- resolver mapping +- artifact/postprocess declarations + +Keep the business collection script in the skill package. + +### Phase C: Build the generator on top of the stabilized contract + +Once line-loss runs through the manifest-driven platform path, define generator templates that produce the same contracts automatically from scenario directories. + +### Phase D: Add future semantic routing later + +When model access is available, layer semantic routing onto the same registered scene contracts. + +The LLM should eventually help with: + +- selecting a scene +- filling unresolved parameters + +But it should not replace the registered execution contract. + +--- + +## Generator Extraction Boundary + +The design must support eventually moving the generator out of `sgClaw`. + +### Required extraction seam + +The generator and runtime should communicate only through generated artifacts and contracts: + +- staged skill package layout +- `scene.toml` +- any scene-local dictionaries/reference data + +### Consequence + +The runtime must not depend on internal generator implementation details. + +This means: + +- do not let the runtime call generator internals directly +- do not let the generator rely on private runtime types as its only output format +- keep manifest and package contracts explicit and versionable + +This is what makes later extraction into a separate repository practical. + +--- + +## `tq-lineloss-report` Lessons-Learned Document Requirement + +The platform design requires a dedicated lessons-learned document based on the full `tq-lineloss-report` implementation and debugging path. + +### Why this document is required + +The line-loss path uncovered issues that a naive generator would recreate immediately. + +These include: + +- deterministic routing and prompt semantics +- strict canonical org/period normalization +- no hidden page-default fallback +- target URL / expected-domain / bootstrap-target contracts +- `browser_script` target URL requirements +- artifact shape discipline +- Rust-side XLSX export necessity because browser-side localhost export can fail under remote page origin constraints +- pipe vs ws differences +- callback-host and helper bootstrap timeout risks +- real-world service-console runtime validation gaps + +### Required sections + +The lessons document should at minimum cover: + +1. source-scene assumptions that must be surfaced explicitly +2. deterministic routing pitfalls +3. canonical parameterization pitfalls +4. bootstrap target and page-context pitfalls +5. execution transport pitfalls (pipe/ws) +6. artifact and export pitfalls +7. testing pitfalls +8. manual runtime validation pitfalls +9. what should become generator template rules +10. what should remain scene-specific manual work + +### Required format and location + +The design requires both artifacts to live in a stable, versioned location under the project docs so future plans and a future standalone generator can depend on them intentionally. + +Recommended shape: + +- `docs/superpowers/references/tq-lineloss-lessons-learned.md` + - human-oriented narrative and rationale +- `docs/superpowers/references/tq-lineloss-lessons-learned.toml` + - structured generator input rules + +The TOML artifact should be organized as reusable rule sections such as: + +- deterministic routing rules +- canonical parameter rules +- bootstrap/target-url rules +- artifact/postprocess rules +- validation/test checklist rules + +The generator should consume the structured TOML rules as template constraints or generation-time validation inputs, while the Markdown document remains the explainability companion for human reviewers. + +### How it should be used + +This document becomes: + +- template hardening input for the generator +- a checklist for reviewing generated scenes +- a planning artifact for deciding which pieces can be automated safely + +--- + +## Existing Code Surfaces to Reuse + +The design should explicitly build on these current platform-adjacent surfaces rather than replacing them wholesale. + +### Skills discovery and loading + +- `src/runtime/engine.rs:232` load skills for surface from configurable directories +- `src/runtime/engine.rs:361` load runtime skills across multiple roots +- `src/compat/config_adapter.rs:90` skill-dir normalization + +### Generic `browser_script` execution + +- `src/compat/direct_skill_runtime.rs:91` raw output execution helper +- `src/compat/direct_skill_runtime.rs:148` tool resolution from staged skills +- `src/compat/browser_script_skill_tool.rs` script loading/wrapping/invocation pipeline + +### Bootstrap target resolution seam + +- `src/service/server.rs:453` submit bootstrap target resolution + +### Current one-off deterministic branch that should be generalized + +- `src/compat/deterministic_submit.rs` + +The line-loss-specific pieces in that file are the main migration targets for platform conversion. + +--- + +## Failure Semantics + +The platform must preserve explicit, business-safe failure semantics. + +### Deterministic mismatch + +If the request ends with `。。。` but no registered scene matches, the runtime must return an explicit deterministic mismatch response. + +### Missing / ambiguous parameters + +If a registered scene matches but required parameters cannot be resolved uniquely, the runtime must prompt rather than guess. + +### Execution failure + +Execution failures should be interpreted according to the registered artifact contract and generic report semantics, not through per-scene special cases in the platform core. + +### Design rule + +The platform should never silently recover by using page defaults when the scene contract requires canonical inputs. + +--- + +## Verification Requirements for the Future Implementation Plan + +The future implementation plan must verify: + +1. registry loading from generated scene manifests +2. deterministic dispatch through registered scenes instead of per-scene branches +3. manifest-driven bootstrap target selection +4. manifest-driven parameter resolver dispatch +5. generic `browser_script` invocation of generated scenes +6. generic report artifact interpretation +7. generic XLSX postprocessing compatibility +8. unchanged behavior for existing non-scene core flows outside v1 scope +9. migration of `tq-lineloss` from hardcoded branch to manifest-driven sample +10. branch strategy based on a new branch copied from `ws` +11. lessons-learned document completeness and reuse as generator input +12. separation seam sufficient for future generator extraction + +--- + +## Out of Scope for the V1 Implementation Plan + +The future implementation plan should explicitly avoid: + +- generic login/session capability as a first-class v1 platform subsystem +- full semantic routing implementation with models +- generalized action workflows +- a full scene DSL runtime +- direct implementation of multiple non-report scene kinds +- replacing the validated core `browser_script` execution path with a new protocol +- broad architectural rewrites unrelated to generated scene skill support + +--- + +## Recommended First Implementation Slice + +The most stable first slice is: + +1. create the scene manifest contract and validator +2. build a registry loader over existing staged skill directories +3. generalize deterministic dispatch to use registered scenes +4. migrate `tq-lineloss` into the first manifest-driven scene +5. document all line-loss lessons learned +6. only then build the scenario-directory-to-skill generator + +This keeps the platform grounded in a working runtime contract before the generator is asked to automate against it. + +--- + +## Final Recommendation + +Build `sgClaw` into a generated scene skill platform by separating it into: + +- a generic runtime platform that discovers, matches, resolves, invokes, and postprocesses scenes using manifest-driven contracts +- a scenario-directory-to-skill generator that emits staged skill packages and scene registration manifests + +Implement v1 only for report/collection-oriented `browser_script` scenes, keep deterministic invocation on the explicit `。。。` suffix, migrate `tq-lineloss` into the first manifest-driven sample, and preserve a clean extraction seam so the generator can later become its own project. \ No newline at end of file diff --git a/docs/superpowers/specs/2026-04-17-generated-scene-rectification-design.md b/docs/superpowers/specs/2026-04-17-generated-scene-rectification-design.md new file mode 100644 index 0000000..4ca2adb --- /dev/null +++ b/docs/superpowers/specs/2026-04-17-generated-scene-rectification-design.md @@ -0,0 +1,236 @@ +# Generated Scene Rectification Design + +> **Status:** Draft +> **Date:** 2026-04-17 +> **Author:** Codex + +## Problem Statement + +当前自动场景转 skill 流程虽然已经引入了 `Scene IR`、`workflowArchetype` 和 readiness 分级,但对营销类复杂报表场景仍存在三类致命偏差: + +1. `sceneId` 会从中文场景名退化成低信息量标识,例如 `营销2.0零度户报表数据生成 -> 2-0`。 +2. bootstrap 会被 `localhost` 导出或辅助服务污染,导致内网入口域名解析错误。 +3. workflow 会在证据不完整时提前归类,导致 `paginated_enrichment` 场景缺失 `paginate`、`secondary_request` 等关键步骤证据。 + +这三个问题叠加后,会出现“结构上看似已生成 skill,实际上放到内网一定跑不通”的假阳性结果。现有生成器仍然偏向“模板填充”,没有形成一条对内网场景足够保守、可审计、可拒绝错误输出的整改链路。 + +## Rectification Goal + +本次整改目标不是继续提升“生成成功率”,而是把生成器收敛成一个更稳的 `scene skill rectifier`,做到: + +1. 不再产出 `2-0` 这类无业务语义的 `sceneId`。 +2. 不再让 `localhost`、静态资源地址、模板噪声参与 bootstrap 竞争。 +3. 不再在工作流证据缺失时冒然输出 `paginated_enrichment` 或其它高复杂 archetype。 +4. 不再把低可信生成结果伪装成“可直接内网试跑”的 skill。 +5. 为后续 mini 版 `skill-creator` 打下可复用的整改底座。 + +## Non-Goals + +1. 不在本轮整改中解决全部历史场景兼容性。 +2. 不要求本轮整改覆盖登录态恢复、复杂鉴权、宿主浏览器差异。 +3. 不要求 LLM 单独完成中文场景语义恢复,整改仍以确定性证据优先。 +4. 不要求一步到位生成所有 browser workflow 细节;允许在 readiness 门禁前失败关闭。 + +## Current Failure Reconstruction + +以 `营销2.0零度户报表数据生成` 为例,当前错误链路大致如下: + +1. 目录名 fallback 经过仅保留 `[a-z0-9]` 的 slug 规则后,中文主体被剥离,只剩 `2-0`。 +2. URL 候选集合没有分层,`http://localhost:13313/...` 与 `http://yx.gs.sgcc.com.cn`、`http://yxgateway.gs.sgcc.com.cn/...` 被放在同一个 bootstrap 竞争池。 +3. 工作流分类优先命中通用的“多模式字段”信号,导致 archetype 判定先偏向 `multi_mode_request`。 +4. endpoint 集合又被 `${apiUrl}`、静态模板串、localhost 导出地址等噪声稀释,最终 workflow step 构造无法稳定提取“分页主请求 -> 逐户补数 -> 过滤 -> 导出”的完整证据。 +5. 生成器仍然给出可输出 skill,造成用户误以为该 skill 具备内网可运行性。 + +所以整改不能只修某一个 if 分支,而是必须同时修正命名链、bootstrap 链、workflow 链和 readiness 链。 + +## Rectification Principles + +### 1. Fail Closed 优先于 Fail Open + +当 `sceneId`、bootstrap、workflow 任一关键链路证据不足时,生成器必须降级、阻断或明确标红,而不是继续生成一个“看起来完整”的 skill。 + +### 2. 先判定证据类别,再消费证据 + +URL、函数名、分页变量、过滤条件、导出调用,必须先完成“证据分层”与“噪声剔除”,再参与 archetype 分类和模板编译。 + +### 3. Bootstrap 与 Export 必须解耦 + +内网业务入口域名与本地导出服务是两类完全不同的运行时概念。`localhost` 可以作为 export/downstream evidence,但绝不能作为 bootstrap candidate。 + +### 4. 命名必须具备业务可读性 + +`sceneId` 不是目录技术标识,而是 skill 的业务身份。任何低熵、数字化、占位式 id 都必须被视为无效结果。 + +### 5. Archetype 输出必须受完整工作流约束 + +`paginated_enrichment` 不是“有分页字段”就能输出,而必须同时满足主列表请求、分页证据、二次请求证据和聚合/过滤/导出链路中的最低组合。 + +## Target Architecture + +```text +source scene + -> source scan + -> evidence stratification + -> naming chain + -> bootstrap chain + -> workflow chain + -> archetype gating + -> readiness grading + -> skill generation or fail-closed report +``` + +整改后的核心不是新增一个大模型步骤,而是在现有 `Scene IR` 前后补齐四道约束: + +1. 命名约束 +2. bootstrap 约束 +3. workflow 约束 +4. readiness 约束 + +## Rectification Design + +### Naming Chain Rectification + +`sceneId` 整改的目标是让“中文目录名 -> 业务可读 sceneId”成为一条受控链路,而不是任由 fallback 退化。 + +整改方案如下: + +1. `sceneId` 候选来源按优先级分层: + - LLM 明确返回的业务语义 id + - 确定性抽取出的英文业务关键词组合 + - 基于中文场景名的受控 transliteration / alias 规则 + - 最终 fallback 仅可作为 `invalid_candidate`,不可直接落盘 +2. 新增 `sceneId` 有效性校验: + - 不能是纯数字或数字主导 + - 不能短于业务最小可读长度 + - 不能只由版本号或通用词组成 + - 不能与 `sceneName` 语义完全脱钩 +3. 对 `2-0`、`1-0`、`report`、`scene` 这类低熵 id 统一判为 `invalid_scene_id`。 +4. 一旦命中无效 id,生成器只能输出整改报告或要求人工确认,不允许直接生成正式 skill 目录。 + +这条链路的结果是:`sceneId` 从“字符串清洗结果”升级为“业务标识产物”。 + +### Bootstrap Chain Rectification + +bootstrap 整改的目标是把 URL 候选集拆成不同证据层,彻底消除 `localhost` 污染。 + +整改方案如下: + +1. 对 URL 候选先做角色分层: + - `business_entry` + - `business_api` + - `gateway_api` + - `export_service` + - `local_helper` + - `static_asset` + - `template_noise` +2. 角色识别规则要求: + - `localhost`、`127.0.0.1`、`SurfaceServices`、`ReportServices` 默认归入 `export_service` 或 `local_helper` + - `.js`、`.css`、模板占位 URL、字符串格式化残片归入 `static_asset` 或 `template_noise` + - 页面常量中的 `sourceUrl`、首页入口地址、业务网关前缀优先归入 `business_entry` 或 `business_api` +3. bootstrap 决策只允许消费: + - `business_entry` + - `business_api` + - `gateway_api` +4. 当 `business_entry` 与 `gateway_api` 并存时: + - `expectedDomain` 取业务主域 + - API 前缀保留在 request evidence 中,不直接覆盖 target page +5. 当只识别到 `localhost` 或噪声地址时,bootstrap 必须判为 `unresolved_bootstrap`,生成器直接降级。 + +这条链路的结果是:`localhost` 仍可保留为“导出依赖证据”,但永远不再有资格被提升成业务入口域名。 + +### Workflow Chain Rectification + +workflow 整改的目标是从“字段特征分类”转为“请求链重建分类”。 + +整改方案如下: + +1. 在 `Scene IR` 中把 workflow 证据拆成四层: + - request evidence + - pagination evidence + - secondary request evidence + - post-process evidence +2. archetype 分类不再优先依赖泛化字段名如 `type/tab/mode/status`,而是优先依赖: + - 是否存在主列表请求 + - 是否存在稳定分页变量组合 + - 是否存在逐项或逐批二次请求 + - 是否存在过滤、聚合或导出动作 +3. endpoint 进入工作流前必须经过归一化: + - 去掉 `${apiUrl}`、格式化占位串、日志文本、异常字符串 + - 去掉 `localhost` export endpoint 对 archetype 的干扰 + - 合并同一业务 API 的不同拼接形态 +4. `paginated_enrichment` 的最小证据门槛改为: + - 至少一个主列表请求 + - 至少一组分页变量 + - 至少一个二次请求入口或明确定义的逐户补数函数 + - 至少一个后处理动作:`filter`、`transform`、`export` 之一 +5. 如果只满足部分证据: + - 可保留为 `candidate_paginated_enrichment` + - 但不得进入正式 `paginated_enrichment` 编译路径 +6. `multi_mode_request` 只在“模式切换显著改变请求体、列定义或响应路径”时成立,不能仅凭通用字段名命中。 + +这条链路的结果是:营销类场景只有在真正重建出“分页 + 补数 + 后处理”证据后,才会进入对应编译器。 + +### Readiness Chain Rectification + +readiness 整改的目标是把“能否生成”与“能否内网试跑”明确分开。 + +整改方案如下: + +1. 新增关键门禁: + - `scene_id_valid` + - `bootstrap_resolved` + - `workflow_complete_for_archetype` + - `runtime_contract_compatible` +2. 只有全部通过时,结果才可标为 `A` 或 `B`。 +3. 任一关键门禁失败时: + - 结果只能是 `C` + - UI 与报告中必须显式展示缺失项 + - 允许输出分析报告,但不应默认输出可运行 skill +4. 对 `marketing-zero-consumer-report` 这类参考场景,readiness 最低通过条件应明确写死为: + - 非退化 `sceneId` + - bootstrap 指向 `yx.gs.sgcc.com.cn` 体系 + - workflow 含 `paginate` + - workflow 含 `secondary_request` + - workflow 含 `filter` 或 `export` + +这条链路的结果是:生成器从“只要能写文件就算成功”切换为“只有通过门禁才算可试跑”。 + +## Superpowers Landing Strategy + +本整改必须通过 `superpowers` 的 spec -> plan -> execution 三段式落地,不接受直接跳到零散修补。 + +落地顺序应为: + +1. 先基于本设计补一份对应的 `docs/superpowers/plans` 执行计划。 +2. 再按计划拆解到 naming、bootstrap、workflow、readiness 四个任务包。 +3. 实施过程中严格以计划边界为准,不额外扩展到登录、鉴权、宿主接入等非本轮范围。 + +## File Impact + +本设计预期主要影响以下区域: + +| File | Responsibility | +|------|----------------| +| `frontend/scene-generator/generator-runner.js` | 命名 fallback、URL 分层、workflow 证据重建、门禁前分析 | +| `frontend/scene-generator/llm-client.js` | sceneId 语义补全约束、证据摘要输入、低熵输出拦截 | +| `frontend/scene-generator/server.js` | readiness 汇总、整改风险输出、生成阻断策略 | +| `frontend/scene-generator/sg_scene_generator.html` | 显示 invalid sceneId、bootstrap 角色、workflow 缺证和 readiness 风险 | +| `src/generated_scene/analyzer.rs` | 证据分类、endpoint 去噪、archetype 前置判断 | +| `src/generated_scene/ir.rs` | 承载分层 evidence、candidate archetype、门禁状态 | +| `src/generated_scene/generator.rs` | 按门禁决定是否允许进入编译器和正式输出 | + +## Acceptance Criteria + +满足以下条件时,视为本整改设计达成目标: + +1. 中文场景目录不再退化生成 `2-0`、`1-0` 等低熵 `sceneId`。 +2. `localhost`、`127.0.0.1`、导出服务地址不再进入 bootstrap 竞争链。 +3. `marketing-zero-consumer-report` 只有在具备 `paginate + secondary_request + post-process` 证据时才会进入 `paginated_enrichment` 编译路径。 +4. 证据不足时,系统输出整改报告和 readiness 风险,而不是默认生成可运行 skill。 +5. 生成器对外定位从“自动模板生成器”收敛为“带门禁的通用场景 skill 转化器”。 + +## Open Questions + +1. `sceneId` 的中文转英文策略是引入固定 alias 词表,还是允许 LLM 先给候选再由规则校验收敛。 +2. `gateway_api` 与 `business_entry` 并存时,是否需要在 `Scene IR` 中同时保留 `entryDomain` 与 `apiDomain`。 +3. readiness 阻断后,默认是否仍允许用户手工确认并强制生成一个 `draft` skill 包。 diff --git a/docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md b/docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md new file mode 100644 index 0000000..013e34c --- /dev/null +++ b/docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md @@ -0,0 +1,790 @@ +# sgClaw Scene Skill 60-to-90 Roadmap Design + +> **Status:** Draft +> **Date:** 2026-04-17 +> **Author:** Codex + +## Problem Statement + +`sg_scene_generate` 与配套 runtime 已从单点原型演进为 scene skill 平台雏形,已具备以下基础能力: + +- `Scene IR` +- `workflowArchetype` 分类 +- deterministic + LLM merge +- readiness / blocker / fail-closed +- scene registry / resolver / dispatch +- `report-artifact` / `xlsx_report` 通用后处理 + +但从业务结果看,自动生成质量仍普遍停留在“约 60 分”的层级:系统可以识别部分结构、生成基础 skill 包、阻断明显错误结果,但尚不能稳定重建 `tq-lineloss-report` 这类高质量 skill 所具备的业务语义。 + +这意味着当前核心问题已经不是“有没有 compiler”,而是: + +> compiler 可以工作,但输入给 compiler 的 `Scene IR` 仍然不够像真实业务。 + +现阶段系统已开始处理以下问题: + +- 低质量 `sceneId` +- bootstrap 被 `localhost` 污染 +- archetype 误判 +- readiness fail-open + +但仍未真正解决以下高价值缺口: + +1. mode-specific request / response / column / normalize 语义恢复不足 +2. `paginated_enrichment` 的主请求、补数请求、过滤和导出链无法稳定绑定 +3. 原始场景里的 BrowserAction、跨页注入、宿主桥接和本地导出链没有被统一建模 +4. 生成器仍偏向“信息抽取 + 模板填充”,而不是“证据驱动的工作流语义恢复” + +因此,本方案的目标不是继续局部修补 prompt 或模板,而是将“场景 skill 自动生成”从 60 分水平提升到可支撑 `tq` 级自动编译的总体路线,并形成正式设计。 + +## Execution Context + +scene skill 转化面对的并非公网独立网页,而是运行在内网环境中的场景包。场景通常嵌入自研浏览器执行,而不是直接在通用浏览器中独立运行。 + +典型执行链如下: + +1. 用户先登录浏览器中的统一平台 +2. 统一平台聚合多个业务系统,并承载场景入口 +3. 用户点击场景执行后,场景脚本再切换或登录到目标业务系统 +4. 登录和上下文预热过程不一定全部显式发生在当前页面中,可能通过隐藏域、宿主接口、后台注入或浏览器管道能力完成 + +因此,源场景中的 `index.html`、`js/` 和页面按钮只是整个执行链的一部分,而非完整运行时边界。后续 scene skill 自动转化必须将“场景源码 + 宿主浏览器能力 + 平台上下文 + 目标系统上下文”视为同一问题空间,不能继续将场景误建模为普通静态网页脚本。 + +## Goal + +在不推翻现有 `Scene IR + compiler + runtime` 架构的前提下,把 scene skill 生成能力从“结构可识别、结果可阻断”的 60 分阶段,提升到“业务语义可恢复、样板家族可稳定编译”的 90 分阶段。 + +具体目标如下: + +1. 对代表性样板场景,可稳定恢复 mode matrix、requestTemplate、responsePath、columnDefs、normalizeRules、参数契约与 bootstrap 契约 +2. 对简单单请求报表场景,可形成高通过率量产模板 +3. 对复杂分页补数场景,至少能正确识别其工作流问题空间,并在证据不足时稳定 fail-closed +4. 让 scene skill 平台从“自动模板生成器”收敛为“带门禁的通用 scene skill 转化器” + +### Success Definition + +本方案的阶段性成功标准,不再以“生成结果是否尽量接近某个参考 skill 的结构”作为唯一目标,而是以通用场景生成后的 skill 能否在内网环境中直接运行、拿到正确数据并产出正确报表作为主判定口径。 + +阶段性成功至少同时满足以下三层闭环: + +1. 执行闭环成立,即生成 skill 可在自研浏览器承载的内网环境中完成执行 +2. 数据闭环成立,即查询、分页、提取后的数据结果正确且完整 +3. 产物闭环成立,即生成的 Excel 或其他报表符合业务规则 + +因此,本轮路线优先面向“单系统、单页面、查询条件明确、分页拉全、按规则生成报表”的通用报表场景。特殊场景、工具型场景和高复杂 workflow 场景不要求在第一轮全部跑通,但必须被正确识别、分类并按边界处理。 + +## Non-Goals + +1. 不承诺一步覆盖全部 102 个场景 +2. 不承诺对任意历史场景做到 100% 一次性自动命中 +3. 不在本轮方案中解决登录恢复、认证兼容、浏览器宿主差异等全部运行环境问题 +4. 不把 BrowserAction 跨页执行链完整抽象到第一阶段 +5. 不把所有复杂文档渲染、模板上传、附件解析场景纳入 P0 +6. 不在本轮方案中展开“统一平台登录 + 目标业务系统后台登录”的自动恢复实现细节,但必须把这类宿主执行前提显式建模 + +## Current Landscape + +## 全量场景结构判断 + +对 `D:\desk\智能体资料\全量业务场景\一平台场景` 的通读表明: + +- 一共约 102 个场景目录 +- 绝大多数具有统一入口 `index.html` +- 技术壳子高度同质化,主流是 Vue2 + jQuery + ElementUI +- 常见依赖包括 `vue.js`、`jquery.js`、`elementui.js`、`moment.js`、`dpage.min.js` +- 常见桥接脚本包括 `ami.js`、`mca.js`、`/a_js/YPTAPI.js` +- 页面结构通常为“工作信息 + 执行过程日志 + 历史报告 + 一键执行”的自动化工作台 + +这些场景并非 102 种完全不同的技术结构,而是少数几种前端场景包模板在不同业务上的复用。 + +## 场景包结构流派 + +### 1. 单文件内联型 + +典型:`95598、12398、流程超期风险工单明细` + +特征: + +- 顶层几乎只有一个 `index.html` +- 大量业务逻辑直接内联在页面内的 `new Vue({...})` +- 适合做源码内联语义提取样板 + +### 2. 标准静态包型 + +典型:`台区线损大数据-月_周累计线损率统计分析` + +特征: + +- 结构通常为 `index.html + css/ + js/ + images/` +- 业务逻辑拆分在 `js/` 目录 +- 线损 / 电量分析类核心样板大多属于这一流派 + +### 3. 模板 / 导出增强型 + +典型:`供电可靠检修计划报表` + +特征: + +- 目录包含 `assets/`、`html/`、`copy/`、`docx/xlsx` 模板文件 +- 不只是查数,还带文档渲染、模板处理、Excel/Word 导出 + +### 4. 带历史副本的重包型 + +典型:`力禾动环系统巡视记录` + +特征: + +- 带 `.history/`、`fsdownload/`、多个 `index*.html` +- 含历史版本、下载副本、调试残留 +- 是后续自动提取的高噪声来源 + +## 系统分布特征 + +高频系统和域名包括: + +- `yx.gs.sgcc.com.cn` +- `yxgateway.gs.sgcc.com.cn` +- `south.95598.sgcc.com.cn` +- `pms30.gs.sgcc.com.cn:32003` +- `20.76.57.61:18080` +- `10.4.39.180` +- 多个 `20.* / 21.* / 25.* / 10.*` 内网 IP 系统 + +同时,大量场景依赖: + +- `localhost:13313` +- `localhost:13311` +- `localhost:13312` + +这表明大量场景并非纯业务页面,而是混合了: + +- 业务 API 链 +- 浏览器页面逻辑 +- 宿主桥接 +- 本地导出 / 本地服务 + +## Host Browser Runtime Context + +`localhost:13313`、`localhost:13311`、`localhost:13312` 以及 `ws://localhost:12345` 这类地址,在当前问题域中不应被简单视为错误业务域或无意义噪声。结合 [多核浏览器管道API接口文档](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/多核浏览器管道API接口文档.docx) 可知,这些地址主要属于自研浏览器宿主提供的本地桥接能力,用于承接页面与浏览器内核、隐藏域、本地服务之间的通信。 + +结合现有文档与场景结构,宿主浏览器至少提供以下能力: + +- websocket 管道通信 +- 隐藏域页面打开与加载完成回调 +- 指定域执行 JS +- 浏览器侧 ajax 代理 +- 登录初始化与退出 +- 主界面 / 隐藏域 / agent 区域切换 +- 本地服务路径获取与导出相关能力 + +因此,`localhost:*` 不应继续直接归类为 bootstrap domain 或主业务 endpoint,而应识别为 `host runtime dependency` 或 `browser bridge capability` 证据:既要从“目标业务域识别”中剥离,又不能从“运行时依赖识别”中抹掉。 + +后续自动转化链需要同时区分三类对象: + +- 真实业务目标域,如 `yx.gs.sgcc.com.cn` +- 宿主浏览器桥接域,如 `localhost:*`、浏览器 websocket、本地服务 +- 场景自身页面与静态资源 + +若这三类对象继续混合建模,系统将持续出现 bootstrap 错判、workflow 误归因和 readiness 过宽放行。 + +## Scene Family Segmentation + +基于对 `102` 个场景目录命名、页面结构、前端依赖和页面壳子形态的归纳,可将当前场景集合收敛为 5 个实施家族,而不是 `102` 种彼此独立的技术问题。 + +### Segmentation Result + +当前 `102` 个场景可分为: + +1. 通用单页报表组:`68` +2. 多模式报表组:`11` +3. 分页明细补数组:`10` +4. 工具检测前置组:`8` +5. 低优先级噪声组:`5` + +### Segmentation Interpretation + +`G1 + G2 + G3` 共 `89` 个场景,约占全部样本的 `87%`。这表明当前主流问题空间仍然是报表型 scene skill 转化,而不是工具型、治理文档型或高噪声边界场景。 + +这一定义直接带来三点结论: + +1. 总体路线应优先围绕报表型场景建立稳定 archetype,而不是先为边界场景设计通用超级框架 +2. `G4` 虽不是当前主战场,但与后续 `200+` 纯 JS 检测类场景存在结构亲缘,应在架构上预留 +3. `G5` 不应进入首轮主线,而应作为降级识别与 fail-closed 的边界样本 + +## Implementation Mapping by Scene Family + +### G1 通用单页报表组 + +定位: + +- 当前覆盖率主力组 +- 最接近“单系统、单页面、查询条件明确、分页或统计后导出报表”的通用场景定义 + +推荐 archetype: + +- `single_request_table` +- `wrapped_single_mode` + +推荐阶段: + +- `P1` + +主要目标: + +- 形成高通过率量产模板 +- 证明通用报表类具备规模化迁移能力 + +代表场景: + +- `售电收入日统计` +- `高低压新增报装容量月度统计表` +- `供电可靠率指标统计表` +- `光伏用户超容情况报表` +- `供电服务工单业务统计表` + +主要验收点: + +1. 查询条件可恢复 +2. request / response contract 可恢复 +3. 数据抽取正确 +4. 报表导出正确 +5. 同家族场景具备可复用性 + +### G2 多模式报表组 + +定位: + +- 当前能力上限验证组 +- 用于证明生成器是否具备 `tq` 级业务语义恢复能力 + +推荐 archetype: + +- `multi_mode_request` + +推荐阶段: + +- `P0` + +主要目标: + +- 验证多模式报表的模式矩阵、模式切换语义和模式内 contract 恢复能力 + +代表场景: + +- `台区线损大数据-月_周累计线损率统计分析` +- `用户日电量监测` +- `线损同期差异报表` + +主要验收点: + +1. 模式矩阵恢复 +2. 模式切换语义恢复 +3. 不同模式下 request / response contract 恢复 +4. 列定义和 normalize 规则恢复 +5. 内网执行后报表结果正确 + +### G3 分页明细补数组 + +定位: + +- 当前复杂 workflow 风险控制组 +- 用于建立分页、明细、补数和 fail-closed 的识别边界 + +推荐 archetype: + +- `paginated_enrichment` + +推荐阶段: + +- `P1` + +主要目标: + +- 建立复杂 workflow 识别和 fail-closed 能力 + +代表场景: + +- `95598工单明细表` +- `故障明细` +- `重复致电(敏感)客户信息明细表` + +主要验收点: + +1. 主链、分页链、补数链拆分正确 +2. 明细拉全正确 +3. 导出链识别正确 +4. 证据不足时稳定 fail-closed + +### G4 工具检测前置组 + +定位: + +- 当前非报表主线的前置组 +- 用于为后续 `200+` 检测类场景预留宿主执行能力 + +推荐 archetype: + +- `embedded_page_tool` +- `page_exec_check` + +推荐阶段: + +- `P2` + +主要目标: + +- 预留页面内 JS 执行、宿主桥接识别和非报表结果采集能力 + +代表场景: + +- `文件自动采集` +- `计量数据助手` +- `巡视计划完成情况自动检索` + +主要验收点: + +1. 页面内 JS 执行能力可建模 +2. 宿主桥接依赖可识别 +3. 非报表型结果可采集 + +### G5 低优先级噪声组 + +定位: + +- 当前首轮主线外的边界组 + +推荐阶段: + +- 降级处理 + +处理策略: + +1. 优先识别而不是适配 +2. 能 fail-closed 的优先 fail-closed +3. 不拿该组场景定义主线 archetype + +## Why Current Quality Stops Around 60 + +现阶段系统已能提取: + +- `sceneId` +- `sceneName` +- endpoint +- 分页变量 +- 部分过滤表达式 +- archetype +- readiness 风险 + +但这些能力仍停留在“信息抽取”层,尚未进入“工作流语义恢复”层。 + +当前质量停留在 60 分附近的根因主要有五类: + +1. 目标对象仍偏浅 + 系统能识别 URL、函数名、分页字段,却不能稳定恢复 mode matrix、request contract、response contract + +2. 缺少中间证据层 + 当前 deterministic、LLM、compiler 三方结果仍然过于直接地汇入 `Scene IR`,缺少可裁决的语义证据层 + +3. archetype 约束仍偏粗 + 能识别 `multi_mode_request` 或 `paginated_enrichment`,但还不能稳定证明“为什么成立”“最少哪些证据成立” + +4. 业务链与宿主链未彻底分离 + `localhost`、导出接口、BrowserAction、静态资源、模板噪声仍容易污染 bootstrap 和 workflow 推断 + +5. 运行时语境没有被显式建模 + 当前生成链仍偏向把场景理解成“页面源码 + 接口抽取”,但真实执行依赖统一平台、自研浏览器、隐藏域登录、宿主 ajax 和本地桥接能力。运行时语境一旦缺席,就会把宿主桥接误判为业务主链,或者把本应依赖宿主环境的场景误判为可独立运行 skill + +## What 90 Means + +本方案中的“90 分”并不指所有场景均可自动跑通,而是指自动生成器开始具备 `tq` 级业务语义恢复能力。 + +一个场景 skill 只有同时满足以下能力,才可视为进入 90 分区间: + +1. 参数契约可恢复 +2. 模式切换语义可恢复 +3. 每个模式的请求模板可恢复 +4. 每个模式的响应提取路径可恢复 +5. 列定义与归一化规则可恢复 +6. 页面上下文和目标域名校验存在 +7. 产物输出结构稳定 +8. 失败时能给出业务级原因或稳定 fail-closed + +## Design Principles + +### 1. 从“信息抽取器”升级到“工作流语义恢复器” + +生成器不能继续停留在“抽 URL、抽函数名、抽字段”的层级,而必须恢复: + +- 业务入口语义 +- 参数契约语义 +- 模式切换语义 +- 请求构造语义 +- 响应提取语义 +- 行归一化语义 +- 工作流语义 +- 本地 / 宿主依赖语义 + +### 2. 先沉淀证据,再归约 Scene IR + +目标主链为: + +`源码 -> 语义证据层 -> 证据归并/冲突消解 -> Scene IR -> compiler` + +而不是: + +`源码 -> LLM 总结 -> Scene IR -> compiler` + +### 3. fail-closed 优先于 fail-open + +对复杂场景,证据不完整时应降级为 draft 或分析报告,而不是伪装成 runnable skill。 + +### 4. archetype 必须由工作流证据驱动 + +`multi_mode_request`、`single_request_table`、`paginated_enrichment` 的成立条件必须由最小工作流证据集支撑,而不是由表面关键词命中支撑。 + +### 5. 先做家族标准答案,再做规模化迁移 + +路线不追求起步即覆盖 102 个场景,而是先打穿少量高价值样板家族,再复制到同类场景。 + +### 6. 先分离宿主层与业务层,再做 archetype 与 Scene IR 归约 + +生成器必须把以下三层信息显式分离: + +- 业务语义层 +- 宿主浏览器能力层 +- 本地服务 / 导出 / 登录桥接层 + +业务语义层负责识别目标系统、参数契约、模式切换、请求构造、响应提取和产物结构。宿主浏览器能力层负责识别隐藏域加载、JS 注入、浏览器 ajax 代理、标签页或区域切换等执行机制。本地服务 / 登录桥接层负责承载统一平台登录、目标系统后台登录、本地导出和宿主 websocket 通信等依赖。 + +若三层继续混合建模,系统将反复出现三类错误:误把 `localhost` 当 bootstrap domain,误把宿主桥接当业务 workflow,误把缺少宿主能力的场景判成可独立运行的 skill。因此,未来 Scene IR 或其前置证据层必须先完成分层,再进入 archetype 判断和 compiler。 + +## Target Architecture + +```text +scene source + -> source scan + -> semantic evidence extraction + -> evidence stratification + -> evidence merge / conflict resolution + -> archetype contract gating + -> Scene IR + -> compiler + -> runtime compatibility check + -> readiness grading + -> runnable skill or fail-closed report +``` + +## Required Semantic Recovery Domains + +后续建设必须围绕以下 8 类语义恢复域展开: + +1. 业务入口语义 +2. 参数契约语义 +3. 模式切换语义 +4. 请求构造语义 +5. 响应提取语义 +6. 行归一化语义 +7. 工作流语义 +8. 本地 / 宿主依赖语义 + +## P0 / P1 Sample Strategy + +## P0 样板组合 + +P0 不追求覆盖面,而是分别证明: + +- 能力上限 +- 规模稳定性 +- 复杂 workflow 识别正确性 + +P0 样板固定为: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `用户日电量监测` +3. `95598工单明细表` + +### P0-1:`台区线损大数据-月_周累计线损率统计分析` + +定位: + +- `multi_mode_request.month_week_table` +- `tq` 主样板 + +#### Canonical Benchmark Mapping + +`台区线损大数据-月_周累计线损率统计分析` 是 `tq-lineloss-report` 的原始场景来源。`tq-lineloss-report` 在本路线中不是普通参考 skill,而是第一份用于衡量“tq 级业务语义恢复能力”的 canonical benchmark。 + +该场景的自动转化目标不应停留在“生成一个可运行 skill 包”,而应尽可能逼近 `tq-lineloss-report` 已经体现出的关键业务语义,包括: + +- `month / week` 模式矩阵 +- 每种模式的请求契约与响应契约 +- 列定义、归一化规则与导出语义 +- bootstrap 与目标系统上下文约束 + +后续对自动生成质量的评估,应优先比较其与 `tq-lineloss-report` 的关键语义契合度,而不是只看是否产出了 runnable artifact。 + +#### Benchmark Role Clarification + +`tq-lineloss-report` 是已完成“场景 -> skill -> 内网跑通”的高质量参考样板,但不是本方案中的唯一硬标准答案。后续自动生成结果不要求机械复制 `tq-lineloss-report` 的全部表现形式,而是要求在关键业务语义、内网可执行性与报表正确性上达到同等级别。 + +因此,`tq-lineloss-report` 在本方案中的作用是: + +1. 证明“场景 -> skill -> 内网跑通”路线可行 +2. 为多模式报表场景提供高质量语义恢复参考 +3. 为 P0-1 提供业务级对照基线,而不是唯一输出模板 + +它的意义: + +- 证明系统能否恢复 `mode matrix` +- 直接对标现有 `tq-lineloss-report` + +### P0-2:`用户日电量监测` + +定位: + +- `single_request_table` / `wrapped_single_mode` +- 单请求量产样板 + +它的意义: + +- 证明系统可以让简单报表家族形成高通过率 + +### P0-3:`95598工单明细表` + +定位: + +- `paginated_enrichment.list_detail_filter_export` +- 分页补数预研主样板 + +它的意义: + +- 证明系统至少能正确识别复杂 workflow,并在证据不足时 fail-closed + +## P1 家族扩展 + +### 线损 / 电量多模式家族 + +- `白银线损周报` +- `线损同期差异报表` +- `线损大数据-窃电分析` +- `供电所线路电量统计` +- `台区零度户月度用电量与台区线损电量对比核查报表` + +### 单请求报表家族 + +- `售电收入日统计` +- `高低压新增报装容量月度统计表` +- `电能表现场检验完成率指标报表` +- `供电可靠率指标统计表` +- `光伏用户超容情况报表` + +### 分页补数家族 + +- `95598、12398、流程超期风险工单明细` +- `故障明细` +- `重复致电(敏感)客户信息明细表` +- `营销业务管控监测日报表` + +## Minimal Implementation Roadmap + +### Roadmap Prioritization Rationale + +由于 `G1 + G2 + G3` 已覆盖 `102` 个场景中的绝大多数,因此路线优先级不按业务部门划分,而按场景家族划分。 + +首轮优先顺序固定为: + +1. 先以 `G2` 多模式报表组验证语义恢复上限 +2. 再以 `G1` 通用单页报表组验证规模化迁移能力 +3. 再以 `G3` 分页明细补数组验证复杂 workflow 与 fail-closed + +`G4` 作为后续检测类扩展的前置组保留,`G5` 不进入首轮主线。 + +## 阶段 1:打通 `tq` 主样板 + +主场景: + +- `台区线损大数据-月_周累计线损率统计分析` + +最关键的三个里程碑: + +1. 稳定恢复完整 `mode matrix` +2. 建立参数契约闭环 +3. 让编译结果与手工 `tq` skill 结构同构 + +## 阶段 2:做单请求量产模板 + +主场景: + +- `用户日电量监测` + +最关键的三个里程碑: + +1. 稳定恢复 request / response / normalize 三件套 +2. 压缩伪通用兜底主路径 +3. 证明同家族单请求场景可复用 + +## 阶段 3:做分页补数的正确识别与阻断 + +主场景: + +- `95598工单明细表` + +最关键的三个里程碑: + +1. 正确拆开主链、补数链、导出链 +2. 建立 `paginated_enrichment` 最小可编译证据集 +3. 在证据不足时稳定 fail-closed + +## Three Global Preconditions + +在进入上述三阶段前,必须先满足三个总前置里程碑: + +1. 建立可裁决的语义证据层 +2. 建立最小可编译业务契约 +3. 冻结 P0 样板标准答案 + +## Precondition 1: Semantic Evidence Layer + +最小必须落地的对象: + +1. 统一证据对象 schema +2. 核心证据类型集合 +3. 证据归并规则 +4. 证据到 `Scene IR` 的映射边界 + +最小证据类型集合建议包括: + +- `bootstrap_candidate` +- `endpoint_candidate` +- `mode_candidate` +- `request_template_candidate` +- `response_path_candidate` +- `column_defs_candidate` +- `normalize_rules_candidate` +- `workflow_candidate` +- `localhost_dependency_candidate` +- `browser_action_candidate` +- `export_candidate` + +## Precondition 2: Minimal Compilable Business Contract + +最小必须落地的对象: + +1. archetype 最小契约表 +2. 契约 gate 列表 +3. 阻断规则 +4. archetype 最小输出契约 + +统一 gate 名称至少包括: + +- `bootstrap_resolved` +- `request_contract_complete` +- `response_contract_complete` +- `workflow_contract_complete` +- `runtime_contract_compatible` + +## Precondition 3: P0 Canonical Answers + +最小必须落地的对象: + +1. 三个 P0 样板的标准 `Scene IR` +2. 三个样板的关键证据清单 +3. 三个样板的验收标准 +4. 三个样板的失败 taxonomy + +## Acceptance Criteria + +本方案完成的标志不是“所有场景都能生成 skill”,而是以下条件成立: + +1. `tq` 主样板能稳定恢复 mode matrix,并产出与手工版高度同构的结果 +2. 单请求主样板能稳定恢复 request / response / normalize 三件套,并可扩展到同家族多个场景 +3. 分页补数主样板能稳定识别其问题空间,并在证据不足时稳定 fail-closed +4. `Scene IR` 前存在可裁决的语义证据层,而不是直接靠全文总结进 compiler +5. archetype 有明确最小可编译业务契约,生成器不会继续伪造 runnable skill + +## Risks + +1. 若继续直接强化 prompt 而不补证据层,质量提升会很快撞到上限 +2. 若没有最小可编译契约,compiler 仍会继续吞入“看起来像 business IR、实际证据不闭合”的结果 +3. 若没有固定 P0 标准答案,后续回归和迁移验证会失去校准基线 +4. 分页补数家族若过早当作主战场,极易把 localhost、导出链、宿主链再次污染到 bootstrap 和 workflow + +## Open Questions + +1. 证据层是否作为独立 `Evidence IR` 引入,还是先以内嵌字段方式扩充当前 `Scene IR` +2. `localhost_dependency_candidate`、`browser_action_candidate`、`export_candidate` 是否第一版就纳入硬门禁 +3. P0 标准答案是否需要单独固化为 fixture + golden IR 双份基线 +4. 对复杂分页补数场景,第一阶段是否允许生成 `draft skill`,还是一律只出分析报告 + +## G1 Boundary Refinement + +本轮整改后,`G1` 不再表示“所有看起来像报表的场景”,而是收敛为“单系统、单主请求、可直接恢复请求契约与表格契约的通用单页报表家族”。 + +`G1` 仅允许承接以下结构: + +1. 单系统、单主页面承载,主流程不依赖复杂宿主桥接。 +2. 存在可识别的主查询入口,且请求模板、响应路径、字段列定义可以直接恢复。 +3. 结果主体为单表或单次汇总,不依赖本地落库后二次分析。 +4. 输出主形态为直接表格结果、Excel 结果或等价的单次数据汇总。 + +出现以下任一特征时,应直接排除出 `G1`: + +1. 业务主链路依赖 `BrowserAction`、`sgBrowserExcuteJsCode` 或同类宿主桥接接口推进。 +2. 页面依赖多轮 callback / 子请求串联才能补全最终结果。 +3. 同一场景内存在多个业务 endpoint 的分类盘点、分桶汇总或接口扫数。 +4. 报表生成前需要本地落库、SQL 聚合、二次分析或文档拼装。 +5. 输出主结果不是直接表格,而是抓取后再生成 Word/专题文档等二段式产物。 + +在此基础上,`G1` 增加一个上边界子型:`G1-E 轻量补查汇总型`。该子型仍属于 `G1`,但允许“单主请求 + 少量补查请求”的轻度扩展,前提是主查询契约仍清晰、最终输出仍以单次汇总表为主,且补查链路不升级为宿主桥接多步 workflow。 + +## Family Reassignment + +基于第一轮真实样本迁移与结构复核,以下 4 个边界样本的归属已经冻结,不再继续作为普通 `G1` 样本混用: + +| 样本场景 | 原暂挂位置 | 正式归属 | 重排原因摘要 | +| --- | --- | --- | --- | +| 高低压新增报装容量月度统计表 | G1 候选 | G1-E 轻量补查汇总型 | 主查询仍存在,但页面同时包含少量补查接口;更适合作为 G1 上边界样本,而非纯 `single_request_table`。 | +| 电能表现场检验完成率指标报表 | G1 候选 | G6 宿主桥接多步查询型 | 主链路依赖 `BrowserAction` / `sgBrowserExcuteJsCode` 与 callback 串联,多步查询特征明确。 | +| 计量资产库存统计 | G1 候选 | G7 多接口盘点汇总型 | 页面通过多个资产统计 endpoint 分类型扫数,属于多接口盘点汇总,而非单请求表格恢复。 | +| 95598供电服务月报 | G1 候选 | G8 抓取落库分析出文档型 | 先抓取数据,再经 `localhost` 落库与 SQL 分析,最后产出文档,结构上已超出报表直生家族。 | + +为避免后续家族边界再次漂移,本设计同时冻结这 4 个新旧家族的最小定义: + +1. `G1-E 轻量补查汇总型` + 主查询清晰、补查轻量、最终仍可归并为单次汇总结果。 +2. `G6 宿主桥接多步查询型` + 业务查询必须经宿主桥接接口推进,且存在显式多步 workflow 或 callback 串联。 +3. `G7 多接口盘点汇总型` + 页面通过多个 endpoint 分项拉取后再统一汇总,核心难点是接口分组、口径对齐和聚合拼装。 +4. `G8 抓取落库分析出文档型` + 页面抓取只是前置阶段,后续还存在本地存储、SQL 分析或文档生成链路。 + +## Implementation Impact + +这次边界整改的直接影响不是“增加几个名字”,而是调整后续实现顺序、验证口径和 gate 策略。 + +首先,`single_request_table` 不再承担所有“看起来像报表”的兜底职责。对于证据落入以下结构的场景,编译器应优先 fail-closed,而不是继续伪装成可运行的 `G1` skill: + +1. 宿主桥接主导型。 +2. 多 endpoint 盘点汇总型。 +3. 本地落库再分析型。 + +其次,后续实现顺序固定为: + +1. 先做 `G1-E` +2. 再做 `G6` +3. 再做 `G7` +4. 最后做 `G8` + +该顺序的原因是: + +1. `G1-E` 与现有 `G1` 能力距离最近,最适合作为边界收紧后的第一步。 +2. `G6` 需要先解决宿主桥接与 workflow 证据建模问题。 +3. `G7` 需要在 `G6` 之后单独处理多接口分组与聚合逻辑。 +4. `G8` 依赖抓取、落库、分析、出文档的完整后链路,复杂度最高,放在最后更稳妥。 + +最后,这一整改会同步改变后续验收口径: + +1. `G1` 的验收重点从“是否生成了 skill”改为“是否恢复出完整且自洽的主请求契约”。 +2. 边界样本不得再以 `single_request_table` 生成成功作为通过标准。 +3. 新家族未落地前,相关样本应输出明确的家族归属和阻断原因,而不是继续产出低质量伪 runnable skill。 diff --git a/docs/superpowers/specs/2026-04-17-scene-skill-compiler-design.md b/docs/superpowers/specs/2026-04-17-scene-skill-compiler-design.md new file mode 100644 index 0000000..9412025 --- /dev/null +++ b/docs/superpowers/specs/2026-04-17-scene-skill-compiler-design.md @@ -0,0 +1,375 @@ +# Scene Skill Compiler Design + +> **Status:** Draft +> **Date:** 2026-04-17 +> **Author:** Codex + +## Problem Statement + +当前 `sg_scene_generate` 已经具备基础的场景识别、LLM 抽取和模板渲染能力,但整体上仍然更接近“场景元数据提取器 + 模板填充器”,还不是一个真正可复用的通用 skill 转化器。 + +这在两个对照样本上表现得很明显: + +| 样本 | 转化方式 | 结果 | +|------|----------|------| +| `tq-lineloss-report` | 基于 Claude 的语义重建 | 生成结果接近可运行 skill,显式表达了月/周模式、请求体、列定义、响应路径 | +| `marketing-zero-consumer-report` | 基于当前项目自动转化 | 生成结果偏“骨架 skill”,无法正确表达分页、逐户补数、过滤、导出等复合工作流 | + +### 根因 + +1. 当前生成流程主要抽取“字段”,没有稳定抽取“工作流”。 +2. 生成器默认假设报表场景接近“单次请求 -> 表格归一化 -> 输出 artifact”。 +3. `scene.toml`、bootstrap、参数合同和 browser script 之间仍存在较强硬编码。 +4. LLM 输入存在截断,且缺少对关键逻辑片段的优先抽取。 +5. 运行时 resolver 能力较弱,无法稳定承接更复杂的自动生成结果。 + +### 典型失败模式 + +以 `marketing-zero-consumer-report` 为例,原始场景实际是: + +1. 获取组织或用户列表。 +2. 按页拉取用户数据。 +3. 对每个用户发起二次请求补充电费信息。 +4. 根据 `charge !== 0` 做业务过滤。 +5. 组装导出数据并通过本地服务导出。 + +但当前自动生成结果把它错误归类成了“单请求报表”,导致: + +- bootstrap 域名和目标页面来源不稳定。 +- browser script 只使用第一个 API 端点。 +- 数据归一化直接对原始列表生效,没有分页循环和二次请求。 +- 生成出的参数合同与真实业务流程不匹配。 + +## Goal + +将当前场景生成器升级为一个面向常见内网场景的“迷你版 skill-creator / scene skill compiler”,使其具备以下能力: + +1. 先理解场景工作流,再选择模板并生成 skill。 +2. 覆盖常见报表类内网场景,而不是只覆盖单请求表格场景。 +3. 在生成前给出可运行性评估,减少“生成成功但内网跑不通”的情况。 +4. 让同类场景可以复用同一套转化机制,而不是逐个手工重写。 + +## Non-Goals + +1. 不追求一次性支持所有历史场景和所有前端技术栈。 +2. 不在第一阶段解决登录、鉴权、跨域和浏览器宿主差异的全部问题。 +3. 不要求 LLM 单独完成完整语义恢复,必须允许规则提取参与。 +4. 不要求生成结果 100% 无需人工审阅。 + +## Design Principles + +### 1. 先建模,再生成 + +必须先把原始场景建模为统一的 `Scene IR`,再由编译器按 archetype 渲染 skill。 + +### 2. 抽取“工作流证据”优先于抽取“字段清单” + +对通用 skill 转化器而言,分页、模式切换、二次请求、导出动作、过滤条件,比单纯的 URL 和列定义更重要。 + +### 3. 确定性优先,LLM 补全 + +URL、请求方法、分页变量、入口函数、列头、导出调用等确定性信息优先由规则提取;LLM 负责做语义归并、命名和补全。 + +### 4. 模板按 archetype 拆分 + +不能继续用一个通用模板覆盖所有报表场景。不同工作流 archetype 必须有独立编译路径。 + +### 5. 运行时合同必须与生成能力对齐 + +生成器输出什么参数合同,运行时 resolver 就必须能承接;否则生成器必须降级或提示人工补齐。 + +## Architecture + +### Target Architecture + +```text +原始场景目录 + -> 场景扫描器 + -> 确定性规则提取 + -> LLM 语义补全 + -> Scene IR 合并 + -> archetype 分类 + -> archetype 编译器 + -> skill 包 + -> 静态验收 / 可运行性评级 +``` + +### Core Pipeline + +1. 扫描 `index.html`、`scripts/*.js`、目录结构和可见依赖关系。 +2. 规则提取器抓取确定性证据。 +3. LLM 基于分块后的关键上下文提取高层语义。 +4. 合并为统一 `Scene IR`。 +5. 根据 `workflowArchetype` 路由到对应编译器。 +6. 生成 `scene.toml`、`SKILL.toml`、browser script 和说明文档。 +7. 执行静态门禁和 readiness 评级。 + +## Scene IR + +### Top-Level Fields + +```json +{ + "sceneId": "marketing-zero-consumer-report", + "sceneName": "营销2.0零度户报表数据生成", + "sceneKind": "report_collection", + "workflowArchetype": "paginated_enrichment", + "bootstrap": { + "expectedDomain": "yx.gs.sgcc.com.cn", + "targetUrl": "http://yx.gs.sgcc.com.cn" + }, + "params": [ + { + "name": "org_code", + "resolver": "org_tree", + "required": true + } + ], + "modes": [], + "workflowSteps": [ + { "type": "paginate", "entry": "getUserList" }, + { "type": "foreach", "source": "userList" }, + { "type": "secondary_request", "entry": "getUserCharges" }, + { "type": "filter", "expr": "charge !== 0" }, + { "type": "export", "entry": "exportExcel" } + ], + "requestTemplate": {}, + "responsePath": "data.rows", + "normalizeRules": { + "type": "field_map" + }, + "artifactContract": { + "type": "report-artifact" + }, + "validationHints": { + "requiresTargetPage": true + }, + "evidence": [] +} +``` + +### Required IR Blocks + +| Block | Purpose | +|------|---------| +| `workflowArchetype` | 决定编译器路由 | +| `bootstrap` | 决定目标域名、目标页面和 helper page 行为 | +| `params` | 决定 `scene.toml` 参数合同 | +| `modes` | 表达月/周、日/月、报表类型切换等多模式逻辑 | +| `workflowSteps` | 表达分页、循环、二次请求、过滤、导出等复合流程 | +| `requestTemplate` | 表达固定请求体和参数映射 | +| `responsePath` | 指定响应数据抽取路径 | +| `normalizeRules` | 指定字段映射、空行过滤和关键字段校验 | +| `artifactContract` | 指定输出 artifact 结构和状态语义 | +| `evidence` | 保留抽取证据,便于 UI 预览和人工复核 | + +## Workflow Archetypes + +第一阶段建议先稳定支持以下 archetype: + +| Archetype | 场景特征 | 典型样本 | +|-----------|----------|----------| +| `single_request_table` | 单次请求,直接返回表格或列表 | 简单报表场景 | +| `multi_mode_request` | 同一场景存在月/周等多模式,请求体和列定义随模式切换 | `tq-lineloss-report` | +| `paginated_enrichment` | 先分页拉主列表,再逐条或逐批补数,再过滤或导出 | `marketing-zero-consumer-report` | +| `page_state_eval` | 更偏状态检查、页面判定、轻量采集 | 监测类或状态判定类场景 | + +### Routing Rules + +1. 有显式模式切换条件,优先判定为 `multi_mode_request`。 +2. 有分页调用且伴随逐条二次请求,优先判定为 `paginated_enrichment`。 +3. 无明确请求链、以页面状态判定为主,归到 `page_state_eval`。 +4. 其余默认归到 `single_request_table`,但标记为低置信度。 + +## Extraction Architecture + +### Stage 1: Deterministic Extraction + +规则提取器负责抽取高确定性信息: + +1. URL、请求方法、`contentType`、请求体拼装方式。 +2. 分页参数,例如 `page`、`rows`、`pageSize`、`sidx`、`sord`。 +3. 入口函数、导出函数、列表函数、详情函数。 +4. `if/switch` 模式分支。 +5. 表头、列定义、字段映射。 +6. 明确的过滤条件,例如 `charge !== 0`。 + +### Stage 2: LLM Semantic Completion + +LLM 负责以下内容: + +1. 对规则提取结果做归并和命名。 +2. 补全 `workflowSteps` 的高层描述。 +3. 判断 archetype。 +4. 推断 `requestTemplate`、`responsePath`、`normalizeRules`。 +5. 输出不确定项和置信度。 + +### Input Strategy + +当前“截前 15000/3000 字符”的做法需要替换为: + +1. 目录结构摘要。 +2. `index.html` 分块。 +3. 命中关键模式的函数片段优先注入。 +4. URL 和请求构建语句优先注入。 +5. 同一场景允许分阶段提问,而不是一次性塞完整上下文。 + +## Compiler Architecture + +### Compiler Split + +建议把当前单一生成器改成 archetype 路由后的多编译器: + +| Compiler | Responsibility | +|----------|----------------| +| `single_request_table` compiler | 生成简单表格采集 skill | +| `multi_mode_request` compiler | 生成多模式切换 skill | +| `paginated_enrichment` compiler | 生成分页 + 补数 + 过滤类 skill | +| `page_state_eval` compiler | 生成状态判定或轻量监测类 skill | + +### Compiler Outputs + +每个编译器都负责: + +1. 生成 `scene.toml`。 +2. 生成 `SKILL.toml`。 +3. 生成 browser script。 +4. 生成引用说明文档,例如 `collection-flow.md`。 +5. 输出 readiness 评级和风险说明。 + +### Marketing Case + +`marketing-zero-consumer-report` 必须走 `paginated_enrichment` 编译器,至少生成这些逻辑骨架: + +1. 主列表分页采集。 +2. 对每个用户执行二次请求。 +3. 聚合字段。 +4. 业务过滤。 +5. 导出或 report artifact 输出。 + +### TQ Case + +`tq-lineloss-report` 必须走 `multi_mode_request` 编译器,至少生成这些逻辑骨架: + +1. 模式识别。 +2. 模式专属请求体构建。 +3. 模式专属列定义。 +4. 固定响应路径抽取。 +5. 统一 artifact 输出。 + +## Runtime Contract Alignment + +当前运行时参数解析器主要集中在 [src/compat/scene_platform/resolvers.rs],能力仍偏基础。编译器设计必须显式处理这层约束。 + +### Short-Term Strategy + +短期有两种可选策略: + +1. 扩展 resolver 集合,支持更多参数合同。 +2. 在生成阶段限制输出,只允许生成当前运行时能消化的参数模型。 + +### Recommended Resolver Additions + +第一阶段建议补齐以下解析能力: + +- `mode_enum` +- `date_range` +- `org_tree` +- `page_size` +- `hidden_static` +- `derived_param` + +如果 resolver 暂时不扩,则生成器必须在 UI 和生成报告中明确标出“不兼容运行时合同”的风险。 + +## Verification And Readiness Gates + +### Static Gates + +生成完成后必须先过静态门禁: + +1. 是否识别到业务入口。 +2. 是否识别到核心请求链。 +3. 是否识别到正确 bootstrap。 +4. 参数合同是否与 archetype 匹配。 +5. 编译器是否覆盖全部关键步骤。 + +### Readiness Levels + +建议为每个生成结果打分级标签: + +| Level | Meaning | +|-------|---------| +| `A` | 可以直接进入内网试跑 | +| `B` | 结构正确,但建议人工校验后试跑 | +| `C` | 只适合作为草稿,需要人工补逻辑 | + +### Minimum Acceptance For Reference Scenes + +`marketing-zero-consumer-report`: + +1. 被识别为 `paginated_enrichment`。 +2. 识别出主列表请求和二次补数请求。 +3. 识别出 `charge !== 0` 过滤逻辑。 +4. 生成结果不再退化为单请求表格模板。 + +`tq-lineloss-report`: + +1. 被识别为 `multi_mode_request`。 +2. 识别出月/周双模式。 +3. 模式切换字段、请求体和列定义被区分。 +4. 生成结果与手工 skill 在结构上同构。 + +## File Impact + +### New Or Modified Areas + +| File | Responsibility | +|------|----------------| +| `frontend/scene-generator/llm-client.js` | 深度抽取 schema、分块上下文、置信度输出 | +| `frontend/scene-generator/generator-runner.js` | 文件读取、关键片段抽取、目录摘要 | +| `frontend/scene-generator/server.js` | 分析接口、IR 透传、生成报告 | +| `frontend/scene-generator/sg_scene_generator.html` | 抽取预览、风险展示、readiness 展示 | +| `src/bin/sg_scene_generate.rs` | 接收 `Scene IR` 或 IR JSON 参数 | +| `src/generated_scene/analyzer.rs` | 确定性提取、archetype 辅助识别 | +| `src/generated_scene/generator.rs` | archetype 路由和多编译器编排 | +| `src/generated_scene/ir.rs` | 定义统一 `Scene IR` | +| `src/compat/scene_platform/resolvers.rs` | 参数合同对齐与扩展 | + +## Migration Strategy + +### Phase 1 + +先修当前明显错误: + +1. bootstrap 来源修正。 +2. 移除通用报表默认硬编码。 +3. 替换截断式 LLM 输入。 +4. 生成前展示抽取预览。 + +### Phase 2 + +引入 `Scene IR`,完成“先建模、再生成”的主干改造。 + +### Phase 3 + +接入 archetype 分类器和多编译器。 + +### Phase 4 + +补运行时 resolver,加入 readiness 门禁。 + +## Open Questions + +1. `Scene IR` 是否作为单独 JSON 文件落地到输出目录,便于后续复用和回放。 +2. `page_state_eval` 是否继续共用当前 `report-artifact`,还是定义独立 artifact 类型。 +3. 是否允许用户在 Web UI 中手工修正 archetype、bootstrap 和参数合同后再生成。 + +## Acceptance Criteria + +满足以下条件时,可以认为本设计达到预期: + +1. 同一套生成流程能够同时覆盖 `tq-lineloss-report` 和 `marketing-zero-consumer-report` 两类差异明显的场景。 +2. `marketing` 不再因错误 archetype 导致内网必然跑不通。 +3. `scene.toml` 不再默认带入错误的组织、周期和标题关键字。 +4. 生成结果具备明确的 readiness 分级,用户能在生成前识别风险。 +5. 生成器在定位上从“模板填充器”升级为“通用场景 skill 编译器”。 diff --git a/docs/superpowers/specs/2026-04-18-g1-e-light-enrichment-report-design.md b/docs/superpowers/specs/2026-04-18-g1-e-light-enrichment-report-design.md new file mode 100644 index 0000000..c1351b4 --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-g1-e-light-enrichment-report-design.md @@ -0,0 +1,243 @@ +# G1-E Light Enrichment Report Design + +**Goal:** 定义 `G1-E 轻量补查汇总型` 的正式实现口径,使生成器能够在“单主请求 + 少量补查请求 + 单次汇总输出”的边界内,稳定恢复出可编译的业务语义,并与普通 `G1 single_request_table` 明确区分。 + +**Status:** Draft + +--- + +## Decision Summary + +1. `G1-E` 是 `G1` 的上边界子型,不是 `G6/G7/G8` 的过渡桶。 +2. `G1-E` 只承接“主查询清晰、补查轻量、最终仍归并为单次汇总结果”的报表场景。 +3. `G1-E` 的编译目标不是继续伪装成纯 `single_request_table`,而是显式生成“主请求 + 轻量补查 + 汇总整形”三段式契约。 +4. `G1-E` 必须在证据层恢复三类对象:主请求证据、补查请求证据、汇总映射证据。 +5. 当补查链路升级为宿主桥接 workflow、多 endpoint 盘点或本地落库分析时,必须 fail-closed 并重分类,不得继续生成 `G1-E` skill。 +6. `高低压新增报装容量月度统计表` 作为当前 `G1-E` 的 P0 样板,用于冻结最小可编译答案。 + +--- + +## Why This Family Exists + +当前 `G1` 的问题不只是“识别率不够”,而是把两类结构混在了一起: + +1. 真正的单主请求单表报表。 +2. 主请求之外还带少量补查、补齐、映射拼装的轻量汇总报表。 + +这两类场景都长得像“普通报表”,但第二类如果硬塞进 `single_request_table`,会导致以下问题: + +1. 只能抽到主页面状态,抽不出真实业务请求。 +2. 即使生成成功,也没有补查契约,运行结果不完整。 +3. 生成器会误把补查型样本当成通用模板,继续污染 `G1` 家族。 + +因此需要把这类场景单独收束为 `G1-E`,既保留它们仍属于通用报表上边界的事实,又防止继续伪装成纯单请求报表。 + +--- + +## Canonical P0 Sample + +当前 `G1-E` 的标准样板固定为: + +- `高低压新增报装容量月度统计表` + +该场景的结构特征是: + +1. 存在清晰主查询入口:`getWkorderAll` +2. 存在少量补查请求: + - `queryElectCustInfo` + - `queryBusAcpt` + - `getBatchPerCust97` +3. 最终输出仍是单次统计汇总,而不是宿主驱动的多步任务,也不是本地落库分析后再出文档 + +`G1-E` 的第一阶段实现和验收都以这个样板为准,不在本阶段横向扩更多家族。 + +--- + +## Non-Negotiable Boundaries + +### 1. `G1-E` 仍属于报表直生家族 + +`G1-E` 仍应保持“场景页面直接可恢复业务查询”的基本属性,不能引入以下结构: + +1. 宿主桥接主导执行 +2. callback 串联的显式多步 workflow +3. 多 endpoint 分类盘点后再统一聚合 +4. `localhost` 落库、SQL 分析、文档导出等二段式后链路 + +### 2. `G1-E` 不是兜底分类 + +只有在主请求明确、补查数量受控、补查职责单一的情况下,才能进入 `G1-E`。 +如果只是“看起来比 G1 复杂一些”,但证据无法收敛为轻量补查模型,就必须阻断并重新分流。 + +### 3. 编译输出必须显式表达补查链路 + +对于 `G1-E`,生成器不能再只输出一个模糊的“请求 + 表格”骨架。 +输出结构里必须能看见: + +1. 主请求是谁 +2. 每个补查请求补的是什么 +3. 补查结果如何并回主结果 + +--- + +## Family Definition + +`G1-E 轻量补查汇总型` 的最小定义如下: + +1. 存在一个可识别的主查询请求,负责拉取主列表或主统计结果。 +2. 存在少量补查请求,数量通常为 `1-3` 个,且职责明确,不形成开放式 workflow。 +3. 补查请求的触发方式可通过主结果行字段、固定上下文参数或有限枚举维度推导。 +4. 最终输出仍为单次汇总表或单份统计结果,不依赖本地持久化再分析。 +5. 页面整体仍可被视为“同一报表任务”,而不是多个独立业务流程拼接。 + +--- + +## Evidence Requirements + +`G1-E` 至少需要恢复以下三层证据。 + +### 1. Main Request Evidence + +必须恢复: + +1. 主请求 endpoint +2. 主请求参数模板 +3. 主请求响应路径 +4. 主表字段或主结果字段映射 + +### 2. Enrichment Request Evidence + +对每个补查请求,必须恢复: + +1. 补查 endpoint +2. 触发条件 +3. 关键入参来源 +4. 返回字段中被消费的部分 + +### 3. Merge / Normalize Evidence + +必须恢复: + +1. 主结果与补查结果的关联键 +2. 汇总列、补充列或映射列的生成规则 +3. 最终输出字段与来源字段之间的映射关系 + +如果三层证据中任一层缺失到无法闭环,`G1-E` 应阻断,不得伪生成 runnable skill。 + +--- + +## Scene IR Contract + +`G1-E` 的最小 `Scene IR` 不应再复用纯 `single_request_table` 的扁平结构,而应扩展为三段式: + +1. `main_request` + - 主查询定义 +2. `enrichment_requests[]` + - 补查请求列表 +3. `merge_plan` + - 主结果与补查结果的并回、字段补齐与最终汇总规则 + +建议最小字段如下: + +- `main_request.endpoint` +- `main_request.params` +- `main_request.response_path` +- `main_request.columns` +- `enrichment_requests[].endpoint` +- `enrichment_requests[].param_bindings` +- `enrichment_requests[].response_path` +- `enrichment_requests[].consumed_fields` +- `merge_plan.join_keys` +- `merge_plan.field_mappings` +- `merge_plan.aggregate_rules` +- `merge_plan.output_columns` + +--- + +## Compiler Contract + +`G1-E` 编译阶段至少应新增以下 gate: + +1. `main_request_resolved` + - 主请求是否恢复完整 +2. `enrichment_requests_resolved` + - 是否识别出所有必要补查请求 +3. `merge_plan_resolved` + - 是否恢复出主补查并回规则 +4. `g1e_scope_compatible` + - 是否仍处于轻量补查边界内,而没有越界为 `G6/G7/G8` + +编译器行为要求如下: + +1. 若 `main_request_resolved = false`,直接阻断。 +2. 若补查请求疑似存在但 `enrichment_requests_resolved = false`,不得退化为 `G1` 成功。 +3. 若 `merge_plan_resolved = false`,不得输出缺少并回逻辑的伪 skill。 +4. 若检测到宿主桥接、多 endpoint 扫数、落库分析等越界特征,直接阻断并给出家族重排建议。 + +--- + +## Runtime Shape + +`G1-E` 的运行时目标形态应固定为: + +1. 先执行主请求 +2. 基于主结果触发有限补查 +3. 将补查结果并回主结果 +4. 输出单次汇总结果 + +这里的“有限补查”必须可控: + +1. 不能无限递归 +2. 不能升级为宿主驱动式多步任务编排 +3. 不能变成接口盘点扫描 + +--- + +## Failure Taxonomy + +`G1-E` 第一版至少要显式区分以下失败类型: + +1. `missing_main_request` + - 主请求未恢复 +2. `missing_enrichment_request` + - 补查请求存在,但未恢复完整 +3. `missing_merge_plan` + - 能看见主链和补查链,但并回关系不完整 +4. `scope_upgraded_to_g6` + - 实际是宿主桥接多步查询 +5. `scope_upgraded_to_g7` + - 实际是多接口盘点汇总 +6. `scope_upgraded_to_g8` + - 实际是抓取落库分析出文档 + +--- + +## Acceptance Criteria + +`G1-E` 第一阶段完成的标志不是“能产出某个 skill 目录”,而是以下条件成立: + +1. `高低压新增报装容量月度统计表` 能稳定恢复主请求、补查请求和并回规则三段式语义。 +2. 生成结果不再退化为只有 `page_state_eval`、`params=[]`、`requestEntries=[]` 的空壳。 +3. 编译器不会再把缺失补查契约的结果误判为普通 `G1` 成功。 +4. 当样本越界时,系统能够明确阻断并说明应转入 `G6/G7/G8`,而不是继续产出低质量 skill。 + +--- + +## Out of Scope + +本 spec 当前不覆盖: + +1. `G6 宿主桥接多步查询型` 的 workflow 建模 +2. `G7 多接口盘点汇总型` 的多 endpoint 盘点框架 +3. `G8 抓取落库分析出文档型` 的本地存储与文档生成后链路 +4. `G1-E` 之外的大规模家族扩展 + +--- + +## Next Step + +这份 spec 冻结后,下一步应直接派生对应实施计划,内容只围绕以下三个实现对象展开: + +1. `G1-E` 证据层补齐 +2. `G1-E` 三段式 `Scene IR` / compiler gate 落地 +3. `高低压新增报装容量月度统计表` 的 P0 样板验证 diff --git a/docs/superpowers/specs/2026-04-18-g2-remediation-design.md b/docs/superpowers/specs/2026-04-18-g2-remediation-design.md new file mode 100644 index 0000000..eff1399 --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-g2-remediation-design.md @@ -0,0 +1,224 @@ +# G2 家族整改设计 + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex + +## Problem Statement + +第一轮真实样本迁移已经完成 `G2` 家族三份代表样本的真实生成与对标分析: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +分析结论已经在以下文档中固化: + +1. `docs/superpowers/reports/2026-04-18-r1-real-tq-lineloss-analysis.md` +2. `docs/superpowers/reports/2026-04-18-g2-first-round-blocker-summary.md` +3. `docs/superpowers/reports/2026-04-18-first-round-migration-and-candidate-validation-report.md` + +当前问题已经不再是“生成器能不能产出 skill 包”,而是: + +> 生成器能产出包,但对 `G2` 线损多模式报表家族仍无法恢复主业务语义,因此没有任何样本达到候选验证门槛。 + +`G2` 家族已经稳定复现以下 blocker: + +1. archetype 从 `multi_mode_request` 坍缩为 `paginated_enrichment` +2. bootstrap 稳定落错到 `20.77.115.36:31051` +3. `modes = []`,只残留默认字段,不存在真实 mode 结构 +4. `requestTemplate = null`,参数合同为空 +5. `columnDefs = []`,列语义与 required fields 缺失 +6. endpoint 候选被静态依赖、外链、其他业务系统严重污染 +7. readiness 过度乐观,与真实可运行性脱节 + +因此,整改阶段的目标不是继续扩样,而是先把 `G2` 家族的主矛盾打透。 + +## Goal + +在不脱离现有 `Scene IR + generator + readiness` 框架的前提下,把 `G2` 家族从“能抽取部分信号”提升到“能稳定恢复线损多模式报表主链”。 + +整改阶段的直接目标如下: + +1. 正确识别 `G2` 家族 archetype 为 `multi_mode_request` +2. 正确恢复主业务 bootstrap,而不是落到错误入口域 +3. 恢复 `month/week` 模式矩阵 +4. 恢复 mode-specific request contract +5. 恢复 mode-specific response path / column defs / normalize rules +6. 对 endpoint 污染建立有效隔离 +7. 让 readiness 与真实业务闭合程度一致,避免虚高 + +## Success Criteria + +整改阶段完成后,`G2` 样本至少要满足以下门槛: + +1. `workflowArchetype = multi_mode_request` +2. `bootstrap.expectedDomain` 与 `targetUrl` 锚定到线损主业务承载面 +3. `modes` 不为空,至少包含 `month` 与 `week` +4. 每个 mode 都有明确 request contract +5. 每个 mode 都有明确 response path 与 column defs +6. 生成脚本不再退化成通用 `paginate -> secondary_request -> filter` 骨架 +7. readiness 不能在核心合同缺失时继续给出 `A` + +整改阶段的通过标准仍然是“进入候选验证门槛”,不是直接宣称已经内网可运行。 + +## Non-Goals + +本整改阶段不处理以下事项: + +1. 不扩展到 `G1/G3` 家族整改 +2. 不解决统一平台登录、目标系统后台登录或宿主认证恢复 +3. 不重写整套 `Scene IR` 框架 +4. 不把所有 BrowserAction 链完整抽象成全新 runtime 模型 +5. 不在本阶段追求覆盖全部 102 个场景 + +## Scope + +本阶段整改范围严格限定为: + +1. `G2` 家族语义识别与编译链 +2. 与 `G2` 识别直接相关的 analyzer / evidence / generator / readiness 逻辑 +3. `G2` 对应的 fixture、测试与对标基线 + +不进入: + +1. `G1` 家族量产优化 +2. `G3` 复杂分页补数 workflow 整改 +3. 运行时 transport、浏览器桥接协议、登录链重构 + +## Root Cause Framing + +基于第一轮报告,当前 `G2` 失真不是单点 bug,而是四层问题叠加: + +### 1. Signal Weighting 错位 + +系统虽然抓到了: + +1. `month/week/tjzq/mode` +2. 线损核心 endpoint +3. `responsePath = content` + +但最终决策时,分页、补数、过滤等噪声信号权重更高,导致 archetype 选错。 + +### 2. Bootstrap Selection 错位 + +bootstrap 候选选择逻辑把“可见入口页”与“真实业务承载页”混淆,导致 `targetUrl` 稳定落错。 + +### 3. Mode Reconstruction 缺失 + +系统能看到模式词面信号,但没有把这些信号提升为: + +1. mode matrix +2. per-mode request builder +3. per-mode response parser +4. per-mode column / normalize contract + +### 4. Readiness Gate 过宽 + +当前 readiness 更像“结构生成完成度”,而不是“业务合同闭合度”,导致错误结果被高分放行。 + +## Design Principles + +整改阶段遵循以下原则: + +1. 先修判定,再修模板 +2. 先修主链,再修文案 +3. 先收窄 `G2` 边界,再扩到其他家族 +4. 所有修复都必须落到可回归的 fixture 与测试 +5. 任何无法闭合的 `G2` 样本必须 fail-closed,而不是继续伪装为候选 skill + +## Workstreams + +整改阶段拆为五条工作流: + +### WS1: G2 Archetype Rectification + +目标: + +让 `G2` 家族不再被分页/补数噪声夺权,优先命中 `multi_mode_request`。 + +包含内容: + +1. 收紧 `G2` archetype 识别条件 +2. 提升 `month/week/tjzq/mode` 信号权重 +3. 降低通用分页信号对 `G2` 的误导 + +### WS2: Bootstrap Rectification + +目标: + +让 bootstrap 选择聚焦真实业务承载面,而不是页面壳或错误入口。 + +包含内容: + +1. 区分入口页、壳页面、真实主业务页 +2. 对 `localhost:*`、静态资源、外链保持排除 +3. 为 `G2` 增加主业务 bootstrap 选择约束 + +### WS3: Mode Contract Reconstruction + +目标: + +从证据层恢复 `month/week` 模式矩阵,并输出 mode-specific 合同。 + +包含内容: + +1. 识别 mode switch field +2. 恢复 `modes[]` +3. 为每个 mode 恢复 request template +4. 为每个 mode 恢复 response path / column defs / normalize rules + +### WS4: Endpoint Purification + +目标: + +把真正业务 endpoint 从依赖库、外链、其他系统噪声中剥离出来。 + +包含内容: + +1. 过滤第三方库和文档 URL +2. 过滤静态资源与依赖包字符串 +3. 提高线损业务 endpoint 候选排序权重 + +### WS5: Readiness Tightening + +目标: + +让 readiness 真正代表“合同闭合度”,而不是“生成是否完成”。 + +包含内容: + +1. 新增 `G2` 必过 gate +2. 当 `modes / request / columnDefs` 缺失时降级 +3. 阻断虚高 `A` + +## Required Deliverables + +整改阶段至少应产出: + +1. `G2` 整改 plan +2. `G2` 对应 fixture / canonical 对比资产更新 +3. `G2` 回归测试 +4. `G2` 整改后第二轮真实迁移报告 + +## Acceptance + +整改阶段验收以 `G2` 家族三份样本为准: + +1. `台区线损大数据-月_周累计线损率统计分析` +2. `白银线损周报` +3. `线损同期差异报表` + +至少满足以下要求: + +1. 三者不再统一坍缩到 `paginated_enrichment` +2. 至少第一份样本达到候选验证门槛 +3. 第二、第三份样本至少能输出更接近真实结构的 `G2` 合同,或者在证据不足时明确 fail-closed + +## Next Step + +基于本设计,下一步应直接落: + +- `docs/superpowers/plans/2026-04-18-g2-remediation-plan.md` + +该 plan 只围绕 `G2` 整改,不扩展到 `G1/G3` 或大规模场景迁移。 diff --git a/docs/superpowers/specs/2026-04-18-g3-paginated-enrichment-design.md b/docs/superpowers/specs/2026-04-18-g3-paginated-enrichment-design.md new file mode 100644 index 0000000..82e4af2 --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-g3-paginated-enrichment-design.md @@ -0,0 +1,313 @@ +# G3 Paginated Enrichment Design + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Inputs:** +> [2026-04-17-scene-skill-60-to-90-roadmap-design.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/specs/2026-04-17-scene-skill-60-to-90-roadmap-design.md) +> [2026-04-17-scene-skill-60-to-90-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md) +> [2026-04-18-first-real-scene-migration-execution-sheet.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-18-first-real-scene-migration-execution-sheet.md) + +## Problem Statement + +`60 -> 90` 主路线在 `G2` 多模式报表家族与 `G1-E` 轻量补查子型上已经取得阶段性收敛: + +1. `G2` 已经从“主样本不可成型”推进到“主样本与多个变体进入候选验证阶段” +2. `G1` 已经完成边界收紧,`G1-E` 的首个 `P0` 样板已通过验证 + +但主路线中的 `P0-3` 仍然缺位: + +1. `paginated_enrichment` 还没有形成正式的 `spec / plan / canonical` 三件套 +2. `95598工单明细表` 虽已被明确指定为 `P0-3` 主样板,但尚未建立统一证据层、最小合同和失败 taxonomy +3. 当前系统仍缺少一条“面对复杂分页、补数、导出和宿主桥接混合 workflow 时,能先正确拆解,再决定生成或阻断”的正式路径 + +因此,当前问题已不再是“是否继续补更多 `G2` 变体”,而是: + +> 必须把 `G3` 分页补数家族从“概念上知道它复杂”推进到“结构上可拆解、合同上可裁决、证据不足时稳定 fail-closed”。 + +## Goal + +在不推翻现有 `Scene IR + compiler + readiness` 框架的前提下,把 `G3` 从“分页补数场景的宽泛标签”升级为“可建模、可编译、可阻断的复杂 workflow archetype”。 + +本设计的直接目标如下: + +1. 正确识别 `G3` 的主请求链、分页链、补数链、导出链 +2. 区分业务 workflow 与宿主桥接行为,不再让 `localhost:*`、BrowserAction 或宿主注入抢占业务主链 +3. 建立 `paginated_enrichment` 的最小可编译证据集 +4. 建立 `G3` 最小业务合同与 gate +5. 让 `95598工单明细表` 成为 `P0-3` 的第一版 canonical answer 与失败基线 +6. 在证据不足时稳定 `fail-closed`,不再产出伪可运行 skill + +## Success Criteria + +`G3` 首轮设计完成后,最低成功口径固定为: + +1. `95598工单明细表` 不再被当成“普通分页表”或“模糊 workflow” +2. 系统能够显式拆出: + - `main request chain` + - `pagination chain` + - `enrichment chain` + - `export chain` +3. 系统能够显式区分: + - `business workflow evidence` + - `host bridge / localhost dependency evidence` +4. `paginated_enrichment` 具备最小合同,不再只是一个兜底 archetype 名称 +5. 证据不足时,结果能够按固定 taxonomy `fail-closed` +6. `G3` 首轮回归结果要么达到候选验证门槛,要么给出准确阻断理由 + +## Non-Goals + +本轮 `G3` 设计不处理以下事项: + +1. 不扩展到全部 `102` 个场景同步整改 +2. 不并行展开 `G6/G7/G8` +3. 不在本轮解决统一平台登录、隐藏域登录恢复或宿主 transport 重构 +4. 不要求第一轮就还原所有工单类复杂业务语义细节 +5. 不把本轮工作扩散为“全部 95598 家族一次性打通” +6. 不为了先生成 skill 而放松 gate + +## Scope + +本轮 `G3` 设计范围严格限定为: + +1. `G3` 家族边界定义 +2. `G3` 证据层建模 +3. `G3` 最小合同与 gate 设计 +4. `G3` 的 `P0-3` canonical baseline 设计 +5. `95598工单明细表` 与一个 `G3` 扩展样板的首轮回归口径 + +本轮不进入: + +1. 大规模 `95598` 场景扩展 +2. 工单类全部子家族重排 +3. 运行时协议改造 +4. 导出后处理、落库分析、文档生产等更高层产物链路重构 + +## Fixed Samples + +本设计冻结如下样板: + +### P0 Main Sample + +1. `95598工单明细表` + +定位: + +1. `paginated_enrichment.list_detail_filter_export` +2. `P0-3` 主样板 +3. 第一版 `G3 canonical` 唯一校准源 + +### P1 Expansion Sample + +1. `95598、12398、流程超期风险工单明细` + +定位: + +1. `G3` 第一扩展样板 +2. 用于验证 `P0-3` 的合同与证据层是否具备复用性 + +在本设计完成前,不新增第三个 `G3` 首轮样板。 + +## Family Definition + +`G3` 的正式定义固定为: + +> 以分页明细拉全为主链,并伴随详情补查、关联补数、过滤去重、导出动作或阶段性聚合的复杂 workflow 报表场景。 + +该定义下,`G3` 至少具备以下一个或多个显著特征: + +1. 存在明确主查询接口,但最终结果不是单页即得 +2. 需要显式分页拉全或滚动时间窗口 +3. 需要对列表行做二次补查或关联详情查询 +4. 存在主链、补链、导出链并存的情况 +5. 最终产物依赖分页明细完整性,而不是单请求返回结果 + +## Inclusion Rules + +`G3` 进入条件固定如下: + +1. 存在主查询链候选 +2. 存在分页控制证据 +3. 存在补查、明细详情或二次链路证据 +4. 最终目标是明细拉全、补齐、筛选、导出或汇总 +5. 业务链可以与宿主桥接链做分层 + +## Exclusion Rules + +出现以下特征之一时,不再归入当前 `G3`: + +1. 只存在单次请求表格返回,无分页与补数闭环 +2. 只有页面点击链,没有可恢复的业务主链 +3. 主体价值在本地落库、SQL 分析或 Word 产物流水线,且业务主链无法恢复 +4. `localhost:*` 或宿主桥接动作压倒业务请求证据 +5. 主要问题不是分页补数,而是宿主多步桥接或文档生产 + +## Root Cause Framing + +当前 `G3` 迟迟未进入正式落地,不是因为它“太复杂无法做”,而是因为存在三个基础缺口: + +### 1. Workflow Signals Are Still Flattened + +当前生成链更擅长提取: + +1. endpoint 名称 +2. 参数片段 +3. 导出调用痕迹 +4. BrowserAction 或页面控制痕迹 + +但缺少把这些信号重建成分层 workflow 的机制,因此: + +1. 主链与补链混杂 +2. 导出链容易被误当成主业务链 +3. 宿主桥接与业务链混杂 + +### 2. Paginated Contract Is Missing + +当前系统还没有 `G3` 专属的最小合同,因此无法明确回答: + +1. 什么算“分页链已恢复” +2. 什么算“补数链已恢复” +3. 什么算“join key 已成立” +4. 什么算“导出链只是附属动作而不是主链” + +### 3. Fail-Closed Taxonomy Is Missing + +即使系统意识到结果不能放行,也缺少固定的失败类型表,因此容易出现: + +1. 阻断理由模糊 +2. readiness 不可解释 +3. 结果无法用于后续回归 + +## Design Principles + +`G3` 设计阶段遵循以下原则: + +1. 先拆 workflow,再讨论生成 +2. 先建证据层,再建合同 +3. 先把宿主链隔离,再恢复业务主链 +4. 优先保证 `fail-closed` 的准确性,而不是优先追求高通过率 +5. 所有规则必须可落到 `fixture / test / report` + +## Required Evidence Types + +在通用证据层之上,`G3` 首轮最小证据类型集合固定为: + +1. `main_request_candidate` +2. `pagination_candidate` +3. `enrichment_request_candidate` +4. `join_key_candidate` +5. `export_candidate` +6. `workflow_step_candidate` +7. `dedupe_or_merge_rule_candidate` +8. `host_bridge_candidate` +9. `localhost_dependency_candidate` +10. `browser_action_candidate` + +## Evidence Layer Requirements + +`G3` 证据层最少必须回答以下问题: + +1. 主查询链是什么 +2. 分页控制来自哪里 +3. 补数链有哪些候选 +4. 主链和补链靠什么字段关联 +5. 导出动作属于业务链还是结果产物链 +6. 哪些行为属于宿主桥接或本地依赖 + +## Minimal Business Contract + +`G3` 的最小可编译合同至少包括: + +1. `main_request` +2. `pagination_plan` +3. `enrichment_requests[]` +4. `join_keys[]` +5. `export_plan` +6. `merge_or_dedupe_rules` + +只有这些对象闭合时,`G3` 才允许进入可编译状态。 + +## Required Gates + +`G3` 统一 gate 名称最少包括: + +1. `g3_main_request_resolved` +2. `g3_pagination_contract_complete` +3. `g3_enrichment_contract_complete` +4. `g3_join_key_resolved` +5. `g3_export_path_identified` +6. `g3_runtime_scope_compatible` + +## Fail-Closed Policy + +以下情况必须明确 `fail-closed`: + +1. 主请求链缺失 +2. 分页链存在但终止条件不明 +3. 补数链存在但 join key 不明 +4. 只有导出动作,没有业务主链 +5. 宿主桥接证据明显多于业务证据 +6. 运行时依赖明显超出当前 `G3` 合同边界 + +## P0 Canonical Target + +`95598工单明细表` 的 canonical baseline 完成后,至少应冻结以下资产: + +1. canonical `Scene IR` +2. 关键证据清单 +3. 最小合同表 +4. 验收检查表 +5. 失败 taxonomy + +## Failure Taxonomy + +`G3` 第一版失败 taxonomy 最少包括: + +1. `main_chain_missing` +2. `pagination_incomplete` +3. `enrichment_incomplete` +4. `join_key_missing` +5. `export_only_without_business_chain` +6. `host_bridge_pollution` +7. `runtime_dependency_unresolved` + +## Validation Baseline + +`G3` 回归时,必须按统一口径检查: + +1. archetype 是否正确 +2. bootstrap 是否合理 +3. 主请求链是否恢复 +4. 分页链是否恢复 +5. 补数链是否恢复 +6. join key 是否恢复 +7. 导出链是否恢复 +8. 宿主链是否被隔离 +9. readiness / blocker 是否可解释 + +## Required Deliverables + +本设计落地时至少产出: + +1. `G3` 设计稿 +2. `G3` 实施计划 +3. `G3` 首轮 `fixture / test` 扩展目标 +4. `95598工单明细表` 的 canonical 设计目标 +5. `G3` 首轮验证报告模板 + +## Acceptance + +本设计完成的标志是: + +1. `G3` 已从宽泛标签进入正式 archetype 设计 +2. `95598工单明细表` 被固定为 `P0-3` 主样板 +3. `G3` 证据层、最小合同、gate 和 fail-closed 口径被明确定义 +4. 后续实现不再把 `G3` 当成“遇到复杂就兜底”的模糊类型 + +## Next Step + +基于本设计,下一步应直接落地: + +- `docs/superpowers/plans/2026-04-18-g3-paginated-enrichment-plan.md` + +该 `plan` 只围绕 `G3 / P0-3` 实施,不扩展到 `G6/G7/G8` 或全量场景铺开。 diff --git a/docs/superpowers/specs/2026-04-18-g6-host-bridge-workflow-design.md b/docs/superpowers/specs/2026-04-18-g6-host-bridge-workflow-design.md new file mode 100644 index 0000000..b730241 --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-g6-host-bridge-workflow-design.md @@ -0,0 +1,67 @@ +# G6 Host Bridge Workflow Design + +> Date: 2026-04-18 +> Status: Initial implementation slice + +## Goal + +Define `G6 宿主桥接多步查询型` as a separate scene family so boundary samples no longer fall back into `G1` or `G1-E`. + +The initial implementation goal is classification and fail-closed safety, not runnable generation. + +## Family Definition + +`G6` covers scenes where the business workflow is primarily advanced by host-browser bridge actions instead of direct request contracts. + +Minimum signals: + +1. explicit host bridge action such as `BrowserAction(...)` +2. explicit browser script bridge such as `sgBrowserExcuteJsCode(...)` +3. callback-driven request progression +4. business endpoints nested behind the host callback chain +5. optional `localhost:*` dependency as host runtime evidence + +## P0 Boundary Sample + +`电能表现场检验完成率指标报表` + +Repo-local representative: + +`tests/fixtures/generated_scene/g6_host_bridge_workflow` + +## Contract Policy + +The first slice intentionally does not generate runnable skills for `G6`. + +Instead, it must: + +1. classify the scene as `host_bridge_workflow` +2. preserve host bridge actions as evidence +3. preserve `localhost:*` dependencies as host-runtime evidence +4. prevent fallback to `single_request_table` +5. prevent fallback to `single_request_enrichment` +6. fail closed with a stable blocker + +## Non-Goals + +1. no host transport redesign +2. no callback runtime implementation +3. no full browser bridge orchestration +4. no broad `G7/G8` expansion +5. no weakening of `G1-E` or `G3` gates + +## Readiness Gates + +The first slice adds these G6-specific gates: + +1. `g6_host_bridge_detected` +2. `g6_fail_closed` + +`g6_fail_closed` is expected to fail until a real G6 runtime contract exists. + +## Acceptance Criteria + +1. `G6` fixture is classified as `host_bridge_workflow` +2. generation fails closed instead of writing a pseudo-runnable skill +3. ordinary localhost export noise does not get promoted to `G6` +4. existing `G1-E`, `G3`, `G2`, and canonical tests remain green diff --git a/docs/superpowers/specs/2026-04-18-g7-multi-endpoint-inventory-design.md b/docs/superpowers/specs/2026-04-18-g7-multi-endpoint-inventory-design.md new file mode 100644 index 0000000..d69067b --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-g7-multi-endpoint-inventory-design.md @@ -0,0 +1,48 @@ +# G7 Multi Endpoint Inventory Design + +> Date: 2026-04-18 +> Status: Initial implementation slice + +## Goal + +Define `G7 多接口盘点汇总型` as a separate family so multi-endpoint inventory scenes no longer fall back into `G1` or `G1-E`. + +The first implementation slice is classification and fail-closed safety only. + +## Family Definition + +`G7` covers scenes that query multiple inventory/statistics endpoints by asset category and aggregate the results into one report. + +Minimum signals: + +1. three or more inventory/statistics endpoints +2. endpoint names or URLs carrying `assetStats`, `inventory`, `stock`, `AcqTrml`, `MeterCommonModule`, or `JlGnModule` +3. no explicit host bridge action requirement +4. no local SQL/document-generation pipeline requirement + +## P0 Boundary Sample + +`计量资产库存统计` + +Repo-local representative: + +`tests/fixtures/generated_scene/g7_multi_endpoint_inventory` + +## Contract Policy + +The first slice intentionally blocks runnable generation until a real G7 inventory contract exists. + +The initial system must: + +1. classify as `multi_endpoint_inventory` +2. preserve inventory endpoint evidence +3. avoid fallback to `single_request_table` +4. avoid fallback to `single_request_enrichment` +5. fail closed with a stable blocker + +## Acceptance Criteria + +1. the representative fixture classifies as `multi_endpoint_inventory` +2. at least five inventory endpoints are detected in the fixture +3. generation fails closed +4. existing `G1-E`, `G3`, `G6`, and `G2` regressions remain green diff --git a/docs/superpowers/specs/2026-04-18-g8-local-doc-pipeline-design.md b/docs/superpowers/specs/2026-04-18-g8-local-doc-pipeline-design.md new file mode 100644 index 0000000..487da91 --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-g8-local-doc-pipeline-design.md @@ -0,0 +1,56 @@ +# G8 Local Document Pipeline Design + +> Date: 2026-04-18 +> Status: Initial implementation slice + +## Goal + +Define `G8 抓取落库分析出文档型` as a separate family so local storage, SQL analysis, and document-generation scenes no longer fall back into `G1`, `G1-E`, `G6`, or `G3`. + +The first implementation slice is classification and fail-closed safety only. + +## Family Definition + +`G8` covers scenes where page/browser data capture is only the front half of the workflow. The business result depends on a downstream local pipeline: + +1. local service persistence or `selectData` +2. SQL analysis such as `definedSqlQuery` +3. document generation such as `docExport` +4. optional host bridge actions +5. optional `localhost:*` dependencies + +## P0 Boundary Sample + +`95598供电服务月报` + +Repo-local representative: + +`tests/fixtures/generated_scene/g8_local_doc_pipeline` + +## Contract Policy + +The first slice intentionally blocks runnable generation until a real G8 local document pipeline contract exists. + +The initial system must: + +1. classify as `local_doc_pipeline` +2. preserve local pipeline evidence +3. avoid fallback to `page_state_eval` +4. avoid fallback to `host_bridge_workflow` +5. avoid fallback to `single_request_table` +6. fail closed with a stable blocker + +## Priority Rule + +When both host bridge and local document pipeline signals exist, `G8` wins over `G6`. + +Reason: `G6` is about host-bridge-driven query progression; `G8` is about the downstream local storage, SQL, and document production chain. + +## Acceptance Criteria + +1. representative fixture classifies as `local_doc_pipeline` +2. local pipeline actions include `definedSqlQuery` +3. local pipeline actions include `docExport` +4. local pipeline actions include `selectData` +5. generation fails closed +6. existing `G1-E`, `G3`, `G6`, `G7`, and `G2` regressions remain green diff --git a/docs/superpowers/specs/2026-04-18-scene-generator-ops-console-design.md b/docs/superpowers/specs/2026-04-18-scene-generator-ops-console-design.md new file mode 100644 index 0000000..fe9101c --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-scene-generator-ops-console-design.md @@ -0,0 +1,380 @@ +# Scene Generator Ops Console Design + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex + +## Problem Statement + +当前 `http://127.0.0.1:3210/` 页面虽然已经具备 scene 选择、深度分析、Skill 生成和日志展示能力,但页面默认形态仍然更接近“开发调试控制台”,而不是“运维执行工作台”。 + +当前主要问题包括: + +1. 首屏信息过多,配置项、分析结果、技术细节和日志同时展开,认知负担过高 +2. 大量英文标题、字段名和技术术语直接暴露给运维人员,理解成本高 +3. `Scene IR`、`workflowArchetype`、`requestTemplate`、`evidence` 等调试信息默认可见,不符合运维默认使用场景 +4. 页面目前优先服务开发者调试,而不是运维执行、结果确认和问题定位 + +因此,该页面需要从“调试面板”收敛为“面向运维的场景 Skill 生成工作台”,并通过信息分层、中文化和双模式设计降低使用门槛。 + +## Goal + +在不削弱原有分析和生成能力的前提下,将 scene generator 页面重构为: + +1. 默认服务运维执行 +2. 默认中文化 +3. 默认只展示结论、操作和结果 +4. 将技术细节折叠为调试层 + +页面重构后的阶段性目标是让运维人员可以不理解底层 `Scene IR` 和 archetype 术语,也能完成以下任务: + +1. 选择场景目录 +2. 启动分析 +3. 判断是否可生成 +4. 启动生成 +5. 查看结果目录或失败原因 + +## Non-Goals + +1. 不在本轮界面优化中修改 scene generator 后端接口协议 +2. 不在本轮优化中重构分析算法或生成逻辑 +3. 不要求删除现有调试信息,只要求调整默认显隐与信息分层 +4. 不要求一次性完成全部视觉风格重设计 + +## User Roles + +页面需要明确区分两类使用者: + +### 1. 运维执行者 + +主要关注: + +1. 处理哪个场景 +2. 当前是否可生成 +3. 为什么不能生成 +4. 生成结果在哪里 +5. 是否需要人工确认 + +### 2. 开发 / 调试者 + +主要关注: + +1. `workflowArchetype` +2. `Scene IR` +3. `requestTemplate` +4. `evidence` +5. `bootstrap` +6. 原始日志流 + +默认界面必须优先服务运维执行者,开发 / 调试者通过“技术详情”进入二级信息层。 + +## Design Principles + +### 1. 默认运维模式 + +首页默认展示“运维执行工作台”,而不是“技术调试面板”。 + +### 2. 先结论后证据 + +首屏先展示: + +1. 当前状态 +2. 场景识别结果 +3. 可执行性评估 +4. 风险摘要 +5. 生成结果 + +技术证据、原始结构和底层日志应延后展示。 + +### 3. 默认中文化 + +面向运维的标题、按钮、状态、风险说明和结果文案应全部中文化。 + +### 4. 技术细节折叠 + +`Scene IR`、`evidence`、`requestTemplate`、`workflow steps` 等信息应进入“技术详情(调试用)”,默认折叠。 + +### 5. 状态表达业务化 + +不直接向运维展示 `Readiness A/B/C` 或 `workflowArchetype` 等底层字段,而应映射为可读的业务状态。 + +## Information Architecture + +页面建议收敛为以下五个区域: + +## 1. 顶部总览区 + +用于一眼说明页面用途和当前总体状态。 + +建议包含: + +1. 页面标题 +2. 页面副标题 +3. 服务状态 +4. 当前状态 +5. 最近操作时间 + +## 2. 左侧主操作区 + +用于承载运维日常需要使用的输入与动作。 + +建议包含: + +1. 场景目录 +2. 场景名称 +3. 输出目录 +4. 开始分析 +5. 生成 Skill +6. 重新开始 +7. 高级设置(折叠) + +## 3. 右侧结果摘要区 + +这是首屏核心区域,负责承载: + +1. 场景识别结果 +2. 可执行性评估 +3. 风险提示 +4. 生成结果 + +## 4. 底部执行过程区 + +用于展示中文化后的关键执行过程日志,而不是开发流原始 SSE 输出。 + +## 5. 技术详情区 + +默认折叠,仅在开发和排障时查看。 + +建议包含: + +1. 场景识别详情 +2. 接口与请求信息 +3. 执行步骤 +4. 模式信息 +5. 识别依据 +6. 风险与缺失项 +7. 原始 JSON / Scene IR +8. 原始技术日志 + +## Default Page Layout + +建议页面结构如下: + +```text +[标题区] +场景 Skill 生成工作台 +当前状态 | 服务状态 | 最近操作时间 + +[左侧:操作区] +场景目录 +场景名称 +输出目录 +开始分析 +生成 Skill +高级设置(折叠) + +[右侧:结果摘要区] +卡片1:场景识别结果 +卡片2:可执行性评估 +卡片3:风险提示 +卡片4:生成结果 + +[底部:执行过程] +中文摘要日志 + +[折叠区:技术详情(调试用)] +场景识别详情 +工作流步骤 +模式信息 +请求模板 +证据与风险 +原始 JSON +原始技术日志 +``` + +## Default Field Visibility + +### 一级信息:运维必须看 + +默认始终可见: + +1. 场景目录 +2. 场景名称 +3. 输出目录 +4. 当前状态 +5. 场景类型 +6. 可执行性评估 +7. 风险摘要 +8. 生成结果 +9. 输出目录 / 结果文件 + +### 二级信息:运维偶尔看 + +默认展示简版: + +1. 目标系统 +2. 输出类型 +3. 最近一次执行结果 +4. 阻断原因 + +### 三级信息:开发 / 调试看 + +默认折叠: + +1. `scene-id` +2. `scene-kind` +3. `targetUrl override` +4. `workflow archetype override` +5. `requestTemplate` +6. `staticParams` +7. `evidence` +8. `confidence` +9. `bootstrap domain` +10. `workflow steps` +11. `endpoints` +12. 原始 SSE 日志 + +## Chinese Copy Strategy + +## Page Title + +建议使用: + +- `场景 Skill 生成工作台` + +副标题建议: + +- `用于分析场景、生成 Skill,并查看内网执行准备情况` + +## Main Action Labels + +建议按钮文案: + +1. `选择目录` +2. `开始分析` +3. `生成 Skill` +4. `重新开始` +5. `恢复默认` +6. `打开输出目录` +7. `查看结果文件` + +## Section Titles + +建议区块标题: + +1. `场景操作` +2. `分析结果` +3. `场景识别结果` +4. `可执行性评估` +5. `风险提示` +6. `生成结果` +7. `执行过程` +8. `技术详情(调试用)` + +## Status Copy + +建议页面主状态: + +1. `待选择场景` +2. `已选择场景,待分析` +3. `分析中` +4. `分析完成` +5. `可直接生成` +6. `可生成但需确认` +7. `暂不建议生成` +8. `生成中` +9. `生成完成` +10. `生成失败` + +## Readiness Mapping + +不建议直接向运维展示 `Readiness A/B/C`,建议映射为: + +1. `A -> 可直接生成` +2. `B -> 可生成但需确认` +3. `C -> 暂不建议生成` + +## Archetype Mapping + +不建议直接向运维展示英文 archetype,建议映射为: + +1. `single_request_table -> 单页报表` +2. `wrapped_single_mode -> 单页报表` +3. `multi_mode_request -> 多模式报表` +4. `paginated_enrichment -> 分页明细` +5. `page_state_eval -> 页面检测` +6. `embedded_page_tool -> 工具场景` +7. `page_exec_check -> 检测场景` + +## Result Copy Examples + +可执行性评估区建议使用中文业务态说明,例如: + +1. `已识别完整查询链与报表输出链,可直接生成` +2. `主要流程已识别,但存在部分风险,建议确认后生成` +3. `当前缺少关键执行信息,暂不建议直接生成` + +风险提示区建议使用简短中文风险,例如: + +1. `未识别完整分页链` +2. `导出规则识别不完整` +3. `目标系统地址存在冲突` +4. `场景类型识别置信度偏低` +5. `存在宿主桥接依赖,需内网环境验证` + +执行过程区建议使用中文摘要日志,例如: + +1. `已开始分析场景` +2. `已完成基础信息识别` +3. `已完成深度分析` +4. `已识别场景类型:多模式报表` +5. `已开始生成 Skill` +6. `Skill 已生成完成` +7. `输出目录:xxx` +8. `生成失败:未识别完整分页补数链` + +## Interaction Model + +### Default Flow + +运维默认流程应收敛为: + +1. 选择场景目录 +2. 点击开始分析 +3. 查看分析摘要 +4. 点击生成 Skill +5. 查看结果目录或失败原因 + +### Advanced Flow + +只有在分析失败、生成失败或需要排障时,才进入以下流程: + +1. 展开风险详情 +2. 展开技术详情 +3. 查看原始日志和识别依据 + +## Implementation Priorities + +本界面优化建议按以下顺序推进: + +1. 默认中文化 +2. 默认隐藏技术细节 +3. 默认只展示“状态摘要 + 操作 + 结果” +4. 日志区中文化 +5. 高级设置折叠 +6. 技术详情折叠 + +## Acceptance Criteria + +本界面优化完成的标志是: + +1. 运维人员不理解 `Scene IR`、`workflowArchetype` 等术语,也能完成场景分析和 Skill 生成 +2. 首屏不再出现大面积未经翻译的英文标题和底层技术字段 +3. 首屏主要承载“状态摘要 + 操作 + 结果”,技术细节默认折叠 +4. 页面默认服务运维执行,技术调试仍可通过二级区域完成 + +## Open Questions + +1. 是否需要显式提供“运维模式 / 调试模式”切换,而不是仅通过折叠区分层 +2. 结果文件是否需要在页面内提供直接打开入口 +3. 风险提示区是否需要区分“阻断项”和“提醒项” diff --git a/docs/superpowers/specs/2026-04-18-scene-skill-post-roadmap-execution-design.md b/docs/superpowers/specs/2026-04-18-scene-skill-post-roadmap-execution-design.md new file mode 100644 index 0000000..088e3aa --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-scene-skill-post-roadmap-execution-design.md @@ -0,0 +1,182 @@ +# sgClaw Scene Skill Post-Roadmap Execution Design + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Plan:** [2026-04-17-scene-skill-60-to-90-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md) + +## Problem Statement + +The current `60-to-90 roadmap` has already completed the planned mainline scope: + +1. `G2` is now a code-backed promoted family baseline with no remaining queue item. +2. `G1-E` is now a code-backed promoted family baseline with no remaining queue item. +3. `G3` is now a code-backed promoted family baseline with no remaining queue item. +4. `G6/G7/G8` remain established boundary-runtime families. +5. `Track E` already has frozen snapshot, current overlay, family assets, and roadmap status assets. + +This means the next problem is no longer: + +`How do we finish the current roadmap?` + +It is now: + +`How do we convert completed repo-local roadmap assets into a stable execution board, a real-sample validation program, and a bounded next-stage roadmap without reopening old implementation work?` + +## Goal + +Define the next-stage execution design after the current roadmap closure, with three explicit goals: + +1. unify the current `102-scene` execution state into one authoritative board +2. introduce real-sample validation as the next quality gate above repo-local fixture success +3. prepare the next bounded roadmap for boundary families and runtime gaps without silently extending the old roadmap + +## Success Definition + +The next stage is considered successful when: + +1. every currently known scene has a stable current-state label in one execution board +2. `repo-local baseline success` and `real-sample success` are explicitly separated +3. the next roadmap boundary is written down before new implementation work begins +4. deferred families and runtime gaps have explicit entry criteria instead of ad hoc expansion + +## Scope + +This next-stage design includes: + +1. current execution-board unification +2. real-sample validation planning and first-round recording +3. boundary-family and runtime-gap prioritization +4. next-stage roadmap design and plan assets + +This design does not include: + +1. reopening `G1/G2/G3` P0/P1 compiler work already completed +2. unlimited fixture expansion +3. full `102-scene` end-to-end runtime rollout +4. direct implementation of unified login recovery +5. direct implementation of all host-runtime and transport gaps + +## Current Baseline + +The current repo already has the following stable assets: + +1. `roadmap_execution_status_2026-04-18.json` +2. `scene_ledger_snapshot_2026-04-18.json` +3. `scene_ledger_status_2026-04-18.json` +4. `p1_family_manifest.json` +5. `p1_family_results.json` + +Together they show that the roadmap mainline is complete at the repo-local level, but they do not yet provide: + +1. one unified `102-scene current execution board` +2. one authoritative real-sample validation layer +3. one explicit next-stage roadmap boundary + +## Design Principles + +1. Do not extend the old roadmap silently. +2. Keep `repo-local promotion` and `real-world validation` as separate stages. +3. Treat family assets as stable inputs, not as temporary scratch data. +4. Keep `G4/G5` deferred until a new entry decision is documented. +5. Keep runtime-gap planning separate from archetype-family planning. +6. Keep execution-board work minimal and subordinate to real-sample validation. +7. Move into real-sample validation as soon as `G2`, `G1-E`, and `G3` each have one mappable real sample. +8. Defer any new asset that does not directly support current validation execution. + +## Workstream Model + +The next stage is divided into four workstreams: + +1. `WS1` Current Execution Board Unification +2. `WS2` Real Sample Validation +3. `WS3` Boundary and Runtime Gap Planning +4. `WS4` Next Roadmap Definition + +## WS1: Current Execution Board Unification + +### Intent + +Unify the frozen snapshot, current overlay, family assets, and roadmap status into one authoritative scene-execution board. +This board is a support layer for validation, not a new standalone asset program. + +### Required Outputs + +1. current execution board +2. snapshot-vs-current diff table +3. family-to-scene mapping table + +### Acceptance + +1. every scene has one current-state label +2. promoted baseline and promoted expansion states are visible at scene level +3. no manual cross-reading across multiple assets is required to know current status +4. the board stays limited to the minimum structure required by real-sample validation + +## WS2: Real Sample Validation + +### Intent + +Introduce the next quality layer above fixture success by validating representative real samples for current mainline families. +Once one mappable real sample exists for each of `G2`, `G1-E`, and `G3`, this workstream takes priority over further board refinement. + +### Required Outputs + +1. real-sample validation plan +2. first-round validation records +3. mismatch taxonomy +4. execution-board status updates + +### Acceptance + +1. each mainline family has at least one real-sample validation record +2. real-world mismatch reasons are explicit +3. fixture success is no longer treated as the final success state +4. validation execution is not blocked by nonessential board or reporting assets + +## WS3: Boundary and Runtime Gap Planning + +### Intent + +Prepare the next bounded scope by deciding what should happen with `G4/G5` and with runtime gaps that the current roadmap intentionally excluded. + +### Required Outputs + +1. boundary family readiness notes +2. deferred family entry criteria +3. runtime gap matrix +4. prioritization note for next implementation round + +### Acceptance + +1. `G4/G5` do not enter by drift +2. runtime gaps have explicit classifications +3. next implementation round has a documented reason for scope choice + +## WS4: Next Roadmap Definition + +### Intent + +Write the next bounded roadmap instead of continuing indefinitely under the old one. + +### Required Outputs + +1. post-roadmap design +2. post-roadmap plan +3. milestone table +4. new completion criteria + +### Acceptance + +1. the next stage has its own scope guardrails +2. the next stage has its own completion criteria +3. new work no longer depends on stretching the old roadmap beyond closure + +## Completion Criteria + +This design is considered fully executed when: + +1. the current roadmap is explicitly marked completed in execution assets +2. the execution board is unified +3. real-sample validation has begun with formal records +4. a new bounded roadmap exists for post-roadmap work diff --git a/docs/superpowers/specs/2026-04-18-scene-skill-real-sample-validation-roadmap-design.md b/docs/superpowers/specs/2026-04-18-scene-skill-real-sample-validation-roadmap-design.md new file mode 100644 index 0000000..71a7acb --- /dev/null +++ b/docs/superpowers/specs/2026-04-18-scene-skill-real-sample-validation-roadmap-design.md @@ -0,0 +1,64 @@ +# sgClaw Scene Skill Real Sample Validation Roadmap Design + +> **Status:** Draft +> **Date:** 2026-04-18 +> **Author:** Codex +> **Upstream Plan:** [2026-04-18-scene-skill-post-roadmap-execution-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-18-scene-skill-post-roadmap-execution-plan.md) + +## Problem Statement + +The completed `60-to-90 roadmap` established repo-local promoted baselines for `G2`, `G1-E`, and `G3`, but the next quality barrier is no longer family promotion. + +It is now real-sample validation: + +1. `G2` already has a real mismatch anchor. +2. `G1-E` already has a real pass anchor. +3. `G3` is now selected into the real-sample queue but still lacks an executed real-run record. +4. `G6/G7/G8` remain boundary families until runtime gaps are explicitly closed. + +The next roadmap must therefore be validation-first instead of asset-first. + +## Goal + +Define the next bounded roadmap around three immediate goals: + +1. convert current selected real samples into formal pass/mismatch/fail-closed records +2. use validation pressure to decide whether boundary families or deferred families should enter implementation +3. keep execution-board work subordinate to validation rather than growing into a new asset program + +## Scope + +This roadmap includes: + +1. real-sample execution for currently selected `G2/G1-E/G3` anchors +2. validation-result-driven scope decisions for `G6/G7/G8` +3. entry decisions for `G4/G5` only after explicit criteria are met + +This roadmap does not include: + +1. reopening completed repo-local compiler work for `G1/G2/G3` +2. unlimited fixture expansion +3. full 102-scene runtime rollout +4. direct implementation of all runtime gaps in one round + +## Design Principles + +1. Real-sample validation is the primary execution axis. +2. Execution-board changes must only exist to support validation records. +3. Boundary-family expansion must be justified by validation pressure, not drift. +4. Deferred-family entry must be decided explicitly before implementation begins. + +## Workstream Model + +1. `WS1` Mainline Real Sample Execution +2. `WS2` Validation Result Triage And Scope Decisions +3. `WS3` Boundary Runtime Enablement Decision +4. `WS4` Deferred Family Entry Decision + +## Completion Criteria + +This roadmap is complete when: + +1. `G2`, `G1-E`, and `G3` each have executed real-sample records +2. the next implementation scope is selected from validation evidence +3. boundary-family and deferred-family entry decisions are documented before new implementation begins diff --git a/docs/superpowers/specs/2026-04-19-102-final-coverage-status-rollup-design.md b/docs/superpowers/specs/2026-04-19-102-final-coverage-status-rollup-design.md new file mode 100644 index 0000000..7e2bd61 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-final-coverage-status-rollup-design.md @@ -0,0 +1,71 @@ +# 102 Final Coverage Status Rollup Design + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-design.md` +> Parent Layer: `Layer E` +> Status: Active + +## Intent + +Publish a final, policy-governed coverage rollup after the residual 13 closure sequence. + +This design consolidates the latest full-coverage reconciliation candidate view with the residual 13 follow-up reconciliation result. It does not update the official execution board. + +## Inputs + +1. `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` +2. `tests/fixtures/generated_scene/residual_13_reconciliation_candidates_2026-04-19.json` +3. `tests/fixtures/generated_scene/boundary_residual_hold_decision_2026-04-19.json` +4. `tests/fixtures/generated_scene/bootstrap_target_residual_isolation_2026-04-19.json` +5. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` + +## Output + +1. `tests/fixtures/generated_scene/final_coverage_status_rollup_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-102-final-coverage-status-rollup-report.md` + +## Rollup Rule + +Start from the 102-scene full coverage reconciliation candidates. + +For every scene present in the residual 13 reconciliation result, replace its previous candidate status with the residual follow-up candidate status. + +## Status Model + +1. `framework-auto-pass-candidate` +2. `framework-structured-fail-closed` +3. `framework-valid-host-bridge` +4. `source-unreadable` +5. `unsupported-family` +6. `misclassified-unresolved` +7. `missing-source` + +## Expected Final Shape + +After residual closure: + +1. `95` framework auto-pass candidates +2. `7` structured fail-closed / hold / isolation candidates +3. `0` unresolved states +4. `0` source-unreadable states +5. `0` unsupported-family states +6. `0` misclassified-unresolved states + +## Boundary + +This design must not: + +1. update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json`; +2. modify `src/generated_scene/analyzer.rs`; +3. modify `src/generated_scene/generator.rs`; +4. promote scenes to official board state; +5. rerun the 102 sweep; +6. add a family. + +## Acceptance Criteria + +1. The rollup contains exactly `102` scenes. +2. Residual 13 updates are applied. +3. The rollup summary matches the final expected shape. +4. The official execution board remains untouched. +5. The report states whether official board reconciliation should be the next step. diff --git a/docs/superpowers/specs/2026-04-19-102-framework-closure-rollup-design.md b/docs/superpowers/specs/2026-04-19-102-framework-closure-rollup-design.md new file mode 100644 index 0000000..982c04f --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-framework-closure-rollup-design.md @@ -0,0 +1,20 @@ +# 102 Framework Closure Rollup Design + +> Date: 2026-04-19 +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` + +## Intent + +Publish the final framework-level status for all 102 scenes after the final-2 residual sequence. + +This is a reporting layer, not an implementation layer. + +## Closure States + +The rollup must distinguish: + +1. full framework auto-pass +2. named structured hold +3. unresolved status + +The target is `unresolved = 0`. diff --git a/docs/superpowers/specs/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-design.md b/docs/superpowers/specs/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-design.md new file mode 100644 index 0000000..c5dac94 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-full-coverage-followup-sweep-and-reconciliation-design.md @@ -0,0 +1,66 @@ +# 102 Full Coverage Follow-Up Sweep And Reconciliation Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Child Sequence: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-plan.md` +> Parent Layer: `Layer E` + +## Intent + +Run one full 102-scene follow-up sweep after Routes 2 through 6 are complete, then publish a reconciliation candidate view governed by the Route 6 promotion policy. + +This design measures cumulative coverage delta. It does not directly update the official execution board. + +## Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 2 follow-up assets +3. Route 3 follow-up assets +4. Route 4 follow-up assets +5. Route 5 boundary decisions +6. Route 6 promotion policy + +## Output Assets + +1. `tests/fixtures/generated_scene/full_coverage_followup_sweep_2026-04-19.json` +2. `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` +3. `docs/superpowers/reports/2026-04-19-102-full-coverage-followup-sweep-report.md` +4. `docs/superpowers/reports/2026-04-19-102-full-coverage-reconciliation-candidates-report.md` + +## Status Model + +The sweep must report: + +1. `auto-pass` +2. `fail-closed-known` +3. `adjudicated-valid-host-bridge` +4. `source-unreadable` +5. `missing-source` +6. `unsupported-family` +7. `misclassified-unresolved` + +The reconciliation candidate view must also report: + +1. `framework-auto-pass-candidate` +2. `framework-structured-fail-closed` +3. `framework-valid-host-bridge` +4. `hygiene-pass-candidate` +5. `hygiene-fail-closed-candidate` + +## Guardrails + +1. Do not modify `src/generated_scene/analyzer.rs`. +2. Do not modify `src/generated_scene/generator.rs`. +3. Do not update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json`. +4. Do not promote scenes directly. +5. Do not open a new family. +6. Do not start implementation work from the sweep result. + +## Completion Criteria + +1. A fixed 102-scene sweep result exists. +2. A reconciliation candidate asset exists. +3. Coverage delta is reported against the previous structured follow-up baseline. +4. The report states the remaining gap to the 102-scene target. + diff --git a/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-design.md b/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-design.md new file mode 100644 index 0000000..6983e5f --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-design.md @@ -0,0 +1,116 @@ +# 102 Full Sweep Dry-Run Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Context: completed `scene-skill 60-to-90` roadmap and post-roadmap real-sample closures + +## 1. Intent + +This design defines a bounded, read-only dry-run over the full `102` scene ledger. + +The target is: + +`measure current generic scene-to-skill coverage without changing generator behavior or promoting scene status` + +## 2. Problem Statement + +The current project has three different coverage numbers: + +1. real-sample executed pass: `5 / 102` +2. code-backed ledger coverage: `23 / 102` +3. repo-local family regression pass count: `24 / 24` + +These numbers are all valid, but none answers the direct question: + +`how many of the 102 scenes can the current generic analyzer/generator handle if we run them all now?` + +This dry-run answers that question. + +## 3. Scope Boundary + +This design is limited to measurement. + +It may include: + +1. reading the current `102` execution board +2. resolving local source directories under the fixed real-scene root +3. running analyzer/generator dry-runs against available sources +4. collecting success, fail-closed, missing-source, and unsupported results +5. publishing a standalone dry-run JSON and report + +It must not include: + +1. changing analyzer logic +2. changing generator logic +3. changing existing family baselines +4. changing `scene_execution_board_2026-04-18.json` +5. promoting scenes from dry-run results +6. creating new family plans +7. running more than the fixed `102` ledger set + +## 4. Fixed Inputs + +### Execution Board + +`tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +### Scene Root + +`D:/desk/智能体资料/全量业务场景/一平台场景` + +### Generator + +`cargo run --bin sg_scene_generate` + +## 5. Fixed Outputs + +### Dry-Run Result JSON + +`tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` + +### Dry-Run Output Root + +`examples/full_sweep_dry_run_2026-04-19` + +### Report + +`docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md` + +## 6. Classification Model + +Each scene must receive exactly one final dry-run status: + +1. `auto-pass` +2. `fail-closed-known` +3. `misclassified` +4. `unsupported-family` +5. `missing-source` +6. `source-unreadable` + +## 7. Coverage Metrics + +The dry-run must report at least these numbers: + +1. `realSampleExecutedPass` +2. `codeBackedLedgerCoverage` +3. `dryRunAutoPass` +4. `dryRunActionableCoverage` +5. `missingSource` +6. `sourceUnreadable` +7. `unsupportedFamily` + +## 8. Non-Negotiable Stop Rules + +1. If a scene fails, record the failure and continue. +2. If many scenes fail with the same blocker, record the blocker and do not fix it in this dry-run. +3. If dry-run discovers a likely bug, write it as a follow-up recommendation only. +4. Do not update the execution board from dry-run output. + +## 9. Exit Condition + +This design is complete when the project has a single bounded plan that: + +1. defines the dry-run tool/task +2. defines the dry-run output schema +3. preserves read-only behavior against generator logic and board status +4. produces a report that answers actual generic coverage over `102` scenes diff --git a/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-triage-design.md b/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-triage-design.md new file mode 100644 index 0000000..d144517 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-full-sweep-dry-run-triage-design.md @@ -0,0 +1,208 @@ +# 102 Full Sweep Dry-Run Triage Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Result: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +> Upstream Report: `docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md` + +## Design Intent + +Split the non-pass buckets from the `102` scene full sweep into concrete, actionable triage categories without changing generator behavior or promoting scene status. + +The design answers: + +`why did 62 scenes not become dry-run auto-pass, and which blocker should be handled first?` + +## Starting Point + +The upstream dry-run produced: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 40 | +| `fail-closed-known` | 26 | +| `misclassified` | 5 | +| `source-unreadable` | 31 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| Total | 102 | + +The triage scope is only the `62` non-pass records. + +## Scope Guardrails + +1. do not edit `src/generated_scene/analyzer.rs` +2. do not edit `src/generated_scene/generator.rs` +3. do not change scene generation logic +4. do not update `scene_execution_board_2026-04-18.json` +5. do not promote scenes from this triage +6. do not add family baselines +7. do not create implementation plans from a single failure +8. do not rerun outside the fixed `102` scene set + +## Fixed Inputs + +1. dry-run result: `tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json` +2. dry-run output root: `examples/full_sweep_dry_run_2026-04-19` +3. execution board: `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +4. scene root: `D:/desk/智能体资料/全量业务场景/一平台场景` + +## Fixed Outputs + +1. triage result: `tests/fixtures/generated_scene/full_sweep_dry_run_triage_2026-04-19.json` +2. triage report: `docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-triage-report.md` + +## Triage Order + +The order is fixed: + +1. timeout triage +2. misclassification triage +3. no-report failure triage + +This order is deliberate: + +1. timeouts are the largest bucket and include already-mapped `G2` scenes +2. misclassification has the cleanest routing-quality signal +3. no-report failures are too broad until the higher-signal buckets are separated + +## Timeout Triage Model + +Input bucket: + +`dryRunStatus = source-unreadable` + +Current count: + +`31` + +Current reason: + +`generator timeout after 30s` + +Target second-level labels: + +1. `timeout-known-family-sample` +2. `timeout-unvalidated-source` +3. `timeout-large-source` +4. `timeout-command-hang` +5. `timeout-generator-slow-but-progressing` +6. `timeout-undetermined` + +Minimum evidence per timeout record: + +1. source directory exists +2. file count +3. total source bytes +4. current group +5. current board status +6. real sample record id if present +7. whether a partial skill directory exists +8. whether a partial generation report exists + +Diagnostic reruns are allowed only for classification. A longer rerun success does not promote the scene. + +## Misclassification Triage Model + +Input bucket: + +`dryRunStatus = misclassified` + +Current count: + +`5` + +Current shape: + +1. `G3 -> host_bridge_workflow`: `3` +2. `G1-E -> host_bridge_workflow`: `2` + +Target second-level labels: + +1. `route-overprefer-host-bridge` +2. `board-expectation-stale` +3. `mixed-workflow-host-bridge-valid` +4. `scene-family-split-needed` +5. `misclassification-undetermined` + +Minimum evidence per misclassification record: + +1. board expected group +2. expected archetype +3. dry-run inferred archetype +4. current source asset +5. real sample layer status +6. generated report path +7. failed or conflicting signal summary + +This phase does not correct routing logic. + +## No-Report Failure Triage Model + +Input bucket: + +`dryRunStatus = fail-closed-known` and reason is `generator failed without generation report` + +Current count: + +`25` + +Target failure stages: + +1. `source-scan` +2. `analyzer` +3. `ir-assembly` +4. `readiness-before-report` +5. `compiler-package-write` +6. `panic-or-process-error` +7. `unknown-no-report` + +The one `bootstrap_target` failure remains separately tracked and is not merged into no-report failures. + +Minimum evidence per no-report record: + +1. exit code if available +2. stdout tail +3. stderr tail +4. partial skill directory exists +5. partial references directory exists +6. generated report exists +7. inferred failure stage + +## Result Schema + +Top-level fields: + +```json +{ + "triageDate": "2026-04-19", + "scope": "102-full-sweep-dry-run-triage", + "sourceDryRun": "tests/fixtures/generated_scene/full_sweep_dry_run_2026-04-19.json", + "summary": {}, + "timeoutTriage": [], + "misclassificationTriage": [], + "noReportFailureTriage": [], + "bootstrapTargetFailures": [], + "recommendations": [] +} +``` + +Each triage record keeps the original dry-run scene id and scene name. + +## Completion Criteria + +This triage is complete when: + +1. all `31` timeout records have a second-level timeout label +2. all `5` misclassified records have a routing triage label +3. all `25` no-report failures have an inferred failure stage +4. the `bootstrap_target` case remains separately visible +5. no scene status is promoted +6. no generator or analyzer logic is changed + +## Stop Rule + +Stop after publishing the triage JSON and report. + +Do not start implementation correction from this triage unless a new bounded implementation plan is explicitly created later. + diff --git a/docs/superpowers/specs/2026-04-19-102-full-sweep-improvement-roadmap-design.md b/docs/superpowers/specs/2026-04-19-102-full-sweep-improvement-roadmap-design.md new file mode 100644 index 0000000..af8f4fa --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-full-sweep-improvement-roadmap-design.md @@ -0,0 +1,239 @@ +# 102 Full Sweep Improvement Roadmap Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Dry-Run: `docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-report.md` +> Upstream Triage: `docs/superpowers/reports/2026-04-19-102-full-sweep-dry-run-triage-report.md` + +## Design Intent + +Use the full `102` scene dry-run and triage results to define a single improvement roadmap for generic `scene -> skill` coverage. + +This roadmap is the post-triage equivalent of the earlier `60-to-90` roadmap. It is not a single bugfix plan. It is the governing design for turning measured dry-run blockers into bounded implementation tracks. + +The design answers: + +`how do we move from 40/102 dry-run auto-pass and 66/102 actionable coverage toward a higher verified generic conversion rate without drifting into unbounded fixes?` + +## Current Baseline + +The current measured state is: + +| Metric | Count | +| --- | ---: | +| Real-sample executed pass | 5 / 102 | +| Code-backed ledger coverage | 23 / 102 | +| Dry-run auto-pass | 40 / 102 | +| Dry-run actionable coverage | 66 / 102 | + +The non-pass triage state is: + +| Bucket | Count | Triage conclusion | +| --- | ---: | --- | +| Timeout | 31 | `19 timeout-unvalidated-source`, `8 timeout-large-source`, `4 timeout-known-family-sample` | +| Misclassified | 5 | all `route-overprefer-host-bridge` | +| No-report failure | 25 | all `readiness-before-report` | +| Bootstrap target | 1 | separate `bootstrap_target` | + +## Problem Statement + +The generic generator already auto-passes more scenes than the formal ledger coverage shows, but the result is not trustworthy enough to promote automatically because: + +1. known-family scenes still appear in the timeout bucket +2. `host_bridge_workflow` can over-absorb scenes expected to remain `G3` or `G1-E` +3. many fail-closed cases terminate before a structured generation report exists +4. timeout and no-report failures hide actionable blocker details + +## Roadmap Goal + +Improve the measurable generic conversion pipeline, not by adding new families first, but by reducing ambiguity in the current failure surface. + +The roadmap has four goals: + +1. make known-family timeout results explainable and repeatable +2. correct or formally adjudicate host-bridge routing over-preference +3. convert pre-report failures into structured fail-closed results +4. rerun a bounded `102` sweep to measure coverage delta + +## Scope Guardrails + +1. do not add new scene families in this roadmap +2. do not promote scenes directly from diagnostic runs +3. do not update `scene_execution_board_2026-04-18.json` until a later explicit status-sync plan +4. do not use one failure as justification for an unbounded rewrite +5. do not reopen completed `G1-E / G2 / G3 / G6 / G7` real-sample pass records unless they are part of a fixed regression check +6. do not start `G4 / G5` +7. do not implement login recovery, full host runtime, or attachment pipeline work in this roadmap + +## Workstreams + +1. `WS1` Timeout and Source-Scale Diagnostics +2. `WS2` Host-Bridge Routing Boundary Correction +3. `WS3` Structured Fail-Closed Reporting +4. `WS4` Coverage Delta Sweep and Decision Board + +## Track A: Known-Family Timeout Diagnostics + +### Intent + +Separate known-family timeout behavior from generic unvalidated-source timeout behavior. + +### Input + +The `4` records labeled: + +`timeout-known-family-sample` + +### Expected Output + +Each known-family timeout gets one of: + +1. `known-family-rerun-pass` +2. `known-family-source-scale-timeout` +3. `known-family-generator-hotspot` +4. `known-family-contract-blocked-after-long-run` +5. `known-family-timeout-unresolved` + +### Design Constraint + +A longer rerun success does not promote a scene. It only changes diagnostic classification. + +## Track B: Timeout Source-Scale Policy + +### Intent + +Create a bounded input filtering and scan-budget policy for large source directories without changing family semantics. + +### Input + +The timeout labels: + +1. `timeout-large-source` +2. `timeout-unvalidated-source` + +### Expected Output + +1. source file selection policy +2. large vendor/library ignore list policy +3. scan-budget decision table +4. timeout reporting shape + +### Design Constraint + +This track is allowed to improve scan boundaries, but not allowed to change archetype semantics. + +## Track C: Host-Bridge Route Over-Preference Correction + +### Intent + +Prevent `host_bridge_workflow` from absorbing scenes that should remain `G3` or `G1-E` when business-chain evidence is stronger. + +### Input + +The `5` records labeled: + +`route-overprefer-host-bridge` + +### Expected Output + +Each misclassification gets one of: + +1. `route-corrected-to-g3` +2. `route-corrected-to-g1e` +3. `board-expectation-reclassified` +4. `valid-host-bridge-workflow` +5. `route-conflict-unresolved` + +### Design Constraint + +This track must preserve the already-passed `G6` real sample and must not degrade `G3` or `G1-E` canonical tests. + +## Track D: Readiness-Before-Report Structured Fail-Closed + +### Intent + +Convert `generator failed without generation report` into structured, machine-readable fail-closed results. + +### Input + +The `25` records labeled: + +`readiness-before-report` + +### Expected Output + +Each case produces a generation report or equivalent dry-run failure record with: + +1. inferred archetype +2. blocker stage +3. missing contract pieces +4. failed gate name +5. actionable reason + +### Design Constraint + +This track should not make failing scenes pass. It should make failures explainable. + +## Track E: Bootstrap Target Isolation + +### Intent + +Keep the single `bootstrap_target` failure separate so it does not pollute the no-report or route-correction work. + +### Input + +The `1` bootstrap target failure: + +`用户停电频次分析监测` + +### Expected Output + +1. isolated bootstrap failure note +2. decision whether it belongs to later bootstrap normalization work + +### Design Constraint + +No bootstrap auto-recovery or login work is included in this roadmap. + +## Track F: Coverage Delta Sweep + +### Intent + +After bounded improvements, rerun a comparable `102` sweep and compare against the baseline. + +### Input + +1. baseline dry-run result +2. updated generator after approved tracks +3. same `102` scene board + +### Expected Output + +1. new dry-run result +2. coverage delta report +3. category movement table +4. decision board for remaining blockers + +### Design Constraint + +The rerun must be comparable to the baseline. It cannot silently change the scene set. + +## Success Criteria + +This roadmap succeeds when: + +1. all known-family timeouts are separated from unvalidated timeout noise +2. all five host-bridge over-preference cases are adjudicated +3. no-report failures become structured fail-closed outputs +4. a follow-up full sweep shows measurable improvement or a clearly explained plateau +5. no new family is introduced to mask existing failure categories + +## Out of Scope + +1. new `G4/G5` implementation +2. full login recovery +3. browser host runtime transport implementation +4. local document attachment pipeline +5. automatic scene promotion into the execution board +6. full manual validation of all `102` generated skills + diff --git a/docs/superpowers/specs/2026-04-19-102-sweep-status-reconciliation-design.md b/docs/superpowers/specs/2026-04-19-102-sweep-status-reconciliation-design.md new file mode 100644 index 0000000..88ad53f --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-102-sweep-status-reconciliation-design.md @@ -0,0 +1,119 @@ +# 102 Sweep Status Reconciliation Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Follow-Up Sweep: `tests/fixtures/generated_scene/full_sweep_improvement_followup_2026-04-19.json` +> Upstream Route Decisions: `tests/fixtures/generated_scene/remaining_route_conflict_decisions_2026-04-19.json` + +## Intent + +Create a single reconciled status view after the `102` full-sweep improvement roadmap and the remaining route-conflict adjudication. + +This design does not change generation behavior. It reconciles measurement assets so the next roadmap starts from a trustworthy status baseline instead of reading stale `misclassified` counts from the follow-up sweep. + +## Problem + +The current assets intentionally remain separated: + +1. the original execution board records current scene status +2. the follow-up sweep records measured analyzer/generator results +3. the route-conflict decision asset adjudicates the remaining `4` sweep misclassifications + +Because the follow-up sweep still contains `4` `misclassified` records, a reader can incorrectly treat them as unresolved route bugs. The later route-conflict plan decided all `4` are `valid-host-bridge-workflow`. + +The next step needs a reconciled view that preserves the raw sweep result while adding the final adjudicated state. + +## Scope + +In scope: + +1. merge follow-up sweep records with route-conflict decisions +2. produce reconciled status counts +3. mark the `4` previous misclassifications as `adjudicated-valid-host-bridge` +4. preserve the `2` remaining timeouts as unresolved timeout inputs +5. summarize the `48` structured fail-closed records by archetype and blocker +6. produce a reconciliation report for the next roadmap + +Out of scope: + +1. modifying `analyzer.rs` +2. modifying `generator.rs` +3. modifying `scene_execution_board_2026-04-18.json` +4. promoting any scene +5. creating or changing family baselines +6. rerunning the `102` sweep +7. implementing fixes for fail-closed records or timeouts + +## Inputs + +Required inputs: + +1. `tests/fixtures/generated_scene/full_sweep_improvement_followup_2026-04-19.json` +2. `tests/fixtures/generated_scene/remaining_route_conflict_decisions_2026-04-19.json` + +Optional read-only inputs: + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-coverage-delta-report.md` +3. `docs/superpowers/reports/2026-04-19-remaining-route-conflict-correction-report.md` + +## Reconciled Status Model + +Every scene keeps its raw follow-up `dryRunStatus`. + +The reconciliation adds `reconciledStatus`: + +1. `auto-pass` +2. `fail-closed-known` +3. `adjudicated-valid-host-bridge` +4. `source-unreadable` +5. `missing-source` +6. `unsupported-family` + +The only status transformation in this plan is: + +`misclassified` + route decision `valid-host-bridge-workflow` -> `adjudicated-valid-host-bridge` + +If a `misclassified` record has no matching final decision, it must remain `misclassified-unresolved`. + +## Expected Reconciled Counts + +Based on the current follow-up sweep and route decisions, the expected reconciliation is: + +| Reconciled status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 2 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | +| Total | 102 | + +## Follow-Up Inputs for Future Roadmaps + +The reconciliation should make the next candidates explicit: + +1. `48` structured fail-closed records for workflow evidence / contract completion analysis +2. `2` remaining timeout records for source-scale or command hang diagnostics +3. `4` valid-host-bridge adjudications for optional execution-board expectation cleanup, not analyzer correction + +## Deliverables + +1. `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-102-sweep-status-reconciliation-report.md` + +## Acceptance Criteria + +1. total reconciled scene count is exactly `102` +2. all `4` route conflicts are reconciled from `misclassified` to `adjudicated-valid-host-bridge` +3. no `misclassified` status remains unless it lacks a route decision +4. the `2` timeout cases remain separate and unresolved +5. no execution board status is changed +6. no analyzer or generator logic is changed + +## Stop Rule + +Stop after publishing the reconciliation JSON and report. + +Do not start fail-closed implementation, timeout diagnostics, or execution-board sync inside this plan. diff --git a/docs/superpowers/specs/2026-04-19-bootstrap-target-normalization-roadmap-design.md b/docs/superpowers/specs/2026-04-19-bootstrap-target-normalization-roadmap-design.md new file mode 100644 index 0000000..03af482 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-bootstrap-target-normalization-roadmap-design.md @@ -0,0 +1,27 @@ +# Bootstrap Target Normalization Roadmap Design + +> Date: 2026-04-19 +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Fixed Scene: `sweep-091-scene` + +## Intent + +Normalize the remaining `page_state_eval` bootstrap target residual without opening general login recovery or browser navigation runtime. + +## Fixed Scope + +Only `sweep-091-scene` is in scope. + +## Minimal Success Definition + +The scene must either: + +1. become `framework-auto-pass-candidate`; or +2. remain `framework-structured-fail-closed` with a narrower named bootstrap target reason. + +## Forbidden Scope + +1. no general login recovery +2. no full browser navigation runtime +3. no host-bridge runtime work +4. no new family diff --git a/docs/superpowers/specs/2026-04-19-boundary-fail-closed-decision-design.md b/docs/superpowers/specs/2026-04-19-boundary-fail-closed-decision-design.md new file mode 100644 index 0000000..36daf5e --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-boundary-fail-closed-decision-design.md @@ -0,0 +1,50 @@ +# Boundary Fail-Closed Decision Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 5: boundary-family fail-closed` +> Parent Layer: `Layer C + Layer D` + +## Intent + +Inspect the remaining boundary-family fail-closed records and decide whether they should be: + +1. deferred +2. kept as boundary fail-closed +3. opened into one bounded correction slice + +This is a decision-first route. + +## Fixed Input Bucket + +1. `local_doc_pipeline = 5` +2. `host_bridge_workflow = 1` +3. `page_state_eval/bootstrap_target = 1` + +## Allowed Files + +1. boundary decision JSON assets +2. boundary decision report assets +3. optional next-plan design/plan files only if a bounded boundary slice is justified + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Expected Delta + +1. no code-level coverage delta is required +2. the expected result is a decision-quality delta: + - defer + - hold + - open one bounded slice + +## Stop Rule + +Stop after the boundary decision is published. + +Do not start boundary implementation under this plan. + diff --git a/docs/superpowers/specs/2026-04-19-boundary-family-real-sample-entry-roadmap-design.md b/docs/superpowers/specs/2026-04-19-boundary-family-real-sample-entry-roadmap-design.md new file mode 100644 index 0000000..777589a --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-boundary-family-real-sample-entry-roadmap-design.md @@ -0,0 +1,121 @@ +# Boundary Family Real-Sample Entry Roadmap Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Validation Layer: [real_sample_validation_records_2026-04-18.json](D:/data/ideaSpace/rust/sgClaw/claw-new/tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json) +> Upstream Entry Rules: [boundary_runtime_entry_rules_2026-04-18.json](D:/data/ideaSpace/rust/sgClaw/claw-new/tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json) + +## 1. Intent + +This design defines the next bounded roadmap after the mainline real-sample anchors are closed. + +The current mainline state is: + +1. `G1-E = executed-pass` +2. `G2 = executed-pass` +3. `G3 = executed-pass` + +So the next roadmap should not reopen mainline contract correction. + +The next bounded question is narrower: + +`Which boundary family, if any, is allowed to enter real-sample execution scope next?` + +## 2. Problem Statement + +The repo already has boundary families established at the fixture and family-asset layer: + +1. `G6 = host_bridge_workflow` +2. `G7 = multi_endpoint_inventory` +3. `G8 = local_doc_pipeline` + +But none of them has been promoted into real-sample execution scope. + +At this point the strongest risk is not lack of family assets. + +It is lack of a bounded admission rule for moving a boundary family from: + +1. `hold-as-boundary` + +to: + +2. `real-sample-entry-candidate` + +Without a dedicated roadmap, any next step is likely to drift into: + +1. accidental boundary implementation +2. premature runtime-platform work +3. reopening deferred families + +## 3. Scope Boundary + +This roadmap is limited to boundary-family entry decision work. + +It may include: + +1. comparing `G6 / G7 / G8` against explicit real-sample entry criteria +2. selecting at most one boundary family as the next execution candidate +3. producing a bounded recommendation and follow-up plan + +It must not include: + +1. implementing new runtime-platform capabilities +2. executing a real sample for more than one boundary family +3. opening `G4 / G5` +4. reopening `G1-E / G2 / G3` +5. broadening into a new all-family migration program + +## 4. Current Decision Inputs + +The current repo state already gives the key decision inputs: + +1. `G6` requires host-bridge execution semantics beyond repo-local coverage +2. `G7` requires real multi-endpoint aggregation verification +3. `G8` requires local document pipeline runtime and attachment handling + +These are not implementation tasks yet. + +They are admission constraints. + +## 5. Roadmap Goal + +The goal of this roadmap is not to make a boundary family pass immediately. + +The goal is to produce one bounded and defensible next execution target: + +1. select exactly one next boundary family +2. explain why it is first +3. explain why the other two remain held +4. define the minimum real-sample entry slice for the selected family + +## 6. Preferred Outcome + +The preferred outcome is: + +1. one selected boundary family +2. one bounded real-sample execution plan for that family +3. the other boundary families explicitly remain `hold-as-boundary` + +An acceptable fallback outcome is: + +1. no boundary family is admitted yet +2. a new bounded roadmap is required for runtime-platform prerequisites first + +## 7. Acceptance Logic + +This roadmap is successful when: + +1. the next post-mainline step is no longer ambiguous +2. only one next-family direction is opened +3. boundary-family expansion pressure is kept bounded +4. deferred families remain untouched + +## 8. Out of Scope + +The following are explicitly out of scope: + +1. new scene-generator family work +2. new canonical answers +3. new mainline contract correction +4. login recovery implementation +5. host runtime or transport implementation beyond decision-level scoping diff --git a/docs/superpowers/specs/2026-04-19-boundary-runtime-prerequisites-roadmap-design.md b/docs/superpowers/specs/2026-04-19-boundary-runtime-prerequisites-roadmap-design.md new file mode 100644 index 0000000..e6994af --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-boundary-runtime-prerequisites-roadmap-design.md @@ -0,0 +1,57 @@ +# Boundary Runtime Prerequisites Roadmap Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Decision: [2026-04-19-post-g7-boundary-decision-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-post-g7-boundary-decision-report.md) + +## 1. Intent + +This design defines the next bounded slice after the post-`G7` boundary decision selects `prerequisites-only hold`. + +The target is: + +`boundary runtime prerequisites roadmap` + +## 2. Why This Direction + +This direction is selected because: + +1. `G7` is already closed and should not be reopened +2. `G6` still depends on stronger host-bridge real execution semantics +3. `G8` still depends on local document runtime and attachment handling +4. forcing either family into execution now would exceed the bounded next-step budget + +## 3. Scope Boundary + +This design is limited to prerequisite scoping only. + +It may include: + +1. separating `G6` prerequisite pressure from `G8` prerequisite pressure +2. defining the minimum prerequisite slice needed before either family can enter real-sample scope +3. selecting one bounded prerequisite direction + +It must not include: + +1. executing `G6` or `G8` +2. implementing host-runtime or local-doc runtime directly +3. reopening `G7` +4. reopening `G1-E / G2 / G3` +5. opening `G4 / G5` + +## 4. Target Outcome + +The bounded target outcome is one of two states: + +1. a selected prerequisite direction for `G6` +2. or a selected prerequisite direction for `G8` + +The design rejects direct family execution under this slice. + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one roadmap that: + +1. compares `G6` and `G8` prerequisite burden directly +2. selects exactly one prerequisite direction +3. publishes one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-final-2-official-board-reconciliation-refresh-design.md b/docs/superpowers/specs/2026-04-19-final-2-official-board-reconciliation-refresh-design.md new file mode 100644 index 0000000..a281210 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-final-2-official-board-reconciliation-refresh-design.md @@ -0,0 +1,21 @@ +# Final 2 Official Board Reconciliation Refresh Design + +> Date: 2026-04-19 +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` + +## Intent + +Apply candidate results from either bootstrap normalization or host-bridge runtime roadmap to the official board. + +This design is only a refresh layer. It does not decide, rerun, or implement runtime behavior. + +## Inputs + +One or both of: + +1. `tests/fixtures/generated_scene/bootstrap_target_normalization_reconciliation_candidates_2026-04-19.json` +2. `tests/fixtures/generated_scene/host_bridge_runtime_reconciliation_candidates_2026-04-19.json` + +## Output + +The official board framework summary should reflect the selected residual roadmap result while preserving workbook and business-status fields. diff --git a/docs/superpowers/specs/2026-04-19-final-2-residual-child-plan-sequence-design.md b/docs/superpowers/specs/2026-04-19-final-2-residual-child-plan-sequence-design.md new file mode 100644 index 0000000..33614ab --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-final-2-residual-child-plan-sequence-design.md @@ -0,0 +1,51 @@ +# Final 2 Residual Child Plan Sequence Design + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer E / Route 5 + Route 6` +> Upstream Board State: `framework-auto-pass = 100`, `framework-structured-fail-closed = 2` + +## Intent + +Define the remaining bounded plan sequence after local-doc runtime closure. Only two framework structured fail-closed residuals remain: + +1. `sweep-085-scene`: `host_bridge_workflow`, next action `future-host-bridge-runtime-roadmap-input` +2. `sweep-091-scene`: `page_state_eval`, next action `future-bootstrap-target-normalization-roadmap-input` + +This sequence prevents drift back into prior G6 micro-plans. Every next step must be anchored to this final-2 residual sequence and the 102 full coverage parent framework. + +## Design Rules + +1. Do not reuse the old G6 semantics micro-plan chain as an execution path. +2. Do not start host-bridge and bootstrap work in the same implementation plan. +3. Do not update the official board inside diagnostic or implementation plans. +4. Do not add a new family. +5. Do not modify unrelated mainline contracts for G1-E, G2, or G3. +6. Any implementation must target exactly one residual scene unless a later parent-framework revision expands the fixed input bucket. + +## Sequence + +1. `Final 2 Residual Roadmap Prioritization` + Decide which residual enters implementation first. + +2. `Bootstrap Target Normalization Roadmap` + Bounded roadmap for `sweep-091-scene`, if selected by prioritization. + +3. `Host-Bridge Runtime Roadmap` + Bounded roadmap for `sweep-085-scene`, if selected by prioritization. + +4. `Final 2 Official Board Reconciliation Refresh` + Consume the selected roadmap output and update only framework-layer board fields. + +5. `102 Framework Closure Rollup` + Publish the final 102-status view after both residuals are closed or explicitly held. + +## Expected End State + +The target end state is one of: + +1. `102 framework-auto-pass`, `0 structured fail-closed` +2. `101 framework-auto-pass`, `1 structured fail-closed with named runtime hold` +3. `100 framework-auto-pass`, `2 structured fail-closed with named runtime holds` + +The third state is allowed only if both residuals are explicitly held by bounded decision plans. diff --git a/docs/superpowers/specs/2026-04-19-final-2-residual-roadmap-prioritization-design.md b/docs/superpowers/specs/2026-04-19-final-2-residual-roadmap-prioritization-design.md new file mode 100644 index 0000000..3aaadca --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-final-2-residual-roadmap-prioritization-design.md @@ -0,0 +1,25 @@ +# Final 2 Residual Roadmap Prioritization Design + +> Date: 2026-04-19 +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` + +## Intent + +Choose the next executable residual roadmap between: + +1. `bootstrap target normalization` +2. `host-bridge runtime` + +The decision must use the current official board state and must not start implementation. + +## Decision Criteria + +1. fixed residual count +2. scope clarity +3. implementation risk +4. probability of improving framework auto-pass count +5. risk of regression to already-passing paths + +## Expected Output + +The output is a decision asset and report naming exactly one selected first roadmap. The non-selected roadmap remains queued. diff --git a/docs/superpowers/specs/2026-04-19-g1e-remaining-fail-closed-closure-design.md b/docs/superpowers/specs/2026-04-19-g1e-remaining-fail-closed-closure-design.md new file mode 100644 index 0000000..6f3176c --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g1e-remaining-fail-closed-closure-design.md @@ -0,0 +1,39 @@ +# G1-E Remaining Fail-Closed Closure Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 4: G1-E / single_request_enrichment` +> Parent Layer: `Layer C + Layer D` + +## Intent + +Reduce the remaining `G1-E / single_request_enrichment` structured fail-closed bucket after Routes 2 and 3 are complete or deferred. + +## Fixed Input Bucket + +`single_request_enrichment = 2` + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. Route 4 local inventory and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 2 and Route 3 assets +3. Route 5+ assets + +## Expected Delta + +1. reduce the remaining `G1-E` fail-closed bucket +2. preserve current real-sample `G1-E` pass + +## Stop Rule + +Stop after the Route 4 bucket is rerun and either reduced or explicitly deferred. + diff --git a/docs/superpowers/specs/2026-04-19-g2-real-sample-contract-correction-design.md b/docs/superpowers/specs/2026-04-19-g2-real-sample-contract-correction-design.md new file mode 100644 index 0000000..74437ab --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g2-real-sample-contract-correction-design.md @@ -0,0 +1,132 @@ +# G2 Real Sample Contract Correction Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Roadmap: [2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md) +> Trigger Record: `rsv-g2-001` + +## 1. Intent + +This bounded design defines the next mainline correction slice after `G3` is closed as an executed pass. + +The only target is: + +`G2 real-sample contract correction` + +The purpose is to reduce the current `G2` real-sample mismatch from a broad first-round failure bundle into either: + +1. a corrected executable pass +2. or a smaller named contract mismatch + +## 2. Current Trigger + +The current real-sample validation record for `G2` is: + +1. `recordId = rsv-g2-001` +2. `validationState = executed-mismatch` +3. `mismatchCodes = [archetype_mismatch, bootstrap_mismatch, request_contract_missing, column_defs_missing]` + +From the current mainline status, `G2` is now the strongest unresolved real-sample pressure. + +## 3. Scope Boundary + +This design is strictly bounded to the real-sample contract gap for the fixed `G2` anchor: + +1. `台区线损大数据-月_周累计线损率统计分析` + +The correction scope is limited to: + +1. bootstrap target correctness +2. request contract correctness +3. column-definition correctness +4. output-contract correctness + +This design does not reopen: + +1. completed `G2` family expansion work +2. new `G2` candidate promotion +3. `G1-E` +4. `G3` +5. `G6 / G7 / G8` +6. `G4 / G5` +7. login recovery or broader runtime-platform work + +## 4. Problem Statement + +The current mismatch is no longer about whether `G2` exists as a family. + +That work is already closed at the repo-local family layer. + +The current problem is narrower: + +1. the fixed real sample still does not close against the intended `tq-lineloss-report`-level business contract +2. the validation layer still records a compound mismatch instead of a narrowed real-sample outcome + +Based on the real-sample analysis and existing `G2` remediation reports, the remaining pressure should be treated as a contract-alignment issue around: + +1. target bootstrap surface +2. mode-specific request template completeness +3. output column semantics +4. output correctness against the intended lineloss artifact + +## 5. Correction Principles + +The correction must obey these principles: + +1. prefer narrowing the current real-sample mismatch over broad family refactoring +2. preserve `fail-closed` behavior for unresolved `G2` variants +3. do not broaden `G2` routing into unrelated line-loss-like scenes +4. keep the correction anchored on the fixed real sample rather than batch fixtures +5. only update validation assets after the real-sample outcome becomes narrower than the current broad mismatch bundle + +## 6. Target Outcome + +The target outcome is one of two bounded states: + +### A. Preferred outcome + +`rsv-g2-001` becomes: + +1. `executed-pass` + +### B. Acceptable narrower outcome + +`rsv-g2-001` remains `executed-mismatch`, but with a smaller named mismatch such as: + +1. bootstrap-only mismatch +2. request-contract-only mismatch +3. column-contract-only mismatch +4. output-contract-only mismatch + +The design explicitly rejects leaving `G2` unchanged at the same coarse four-code mismatch bundle. + +## 7. Required Verification Surfaces + +The correction must be verified against these surfaces: + +1. real generated `generation-report.json` +2. intended `tq-lineloss-report` semantic baseline +3. automated regression that names the corrected real-sample pattern +4. validation-layer assets: + - `real_sample_validation_records_2026-04-18.json` + - `scene_execution_board_2026-04-18.json` + - `boundary_runtime_entry_rules_2026-04-18.json` if prioritization changes + +## 8. Out of Scope + +The following are explicitly out of scope for this design: + +1. promoting more `G2` fixtures +2. redesigning all `G2` subtype handling +3. rewriting the general `multi_mode_request` compiler +4. opening a new `G2` family roadmap +5. changing unrelated validation records + +## 9. Exit Condition + +This design is complete when implementation can be bounded to a single plan that: + +1. freezes the fixed `G2` real sample +2. isolates the remaining bootstrap/request/column/output gap +3. narrows the real-sample outcome +4. updates validation assets without reopening family-expansion work diff --git a/docs/superpowers/specs/2026-04-19-g2-remaining-fail-closed-closure-design.md b/docs/superpowers/specs/2026-04-19-g2-remaining-fail-closed-closure-design.md new file mode 100644 index 0000000..4ab0143 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g2-remaining-fail-closed-closure-design.md @@ -0,0 +1,44 @@ +# G2 Remaining Fail-Closed Closure Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 3: G2 / multi_mode_request` +> Parent Layer: `Layer C + Layer D` + +## Intent + +Reduce the remaining `G2 / multi_mode_request` structured fail-closed bucket after Route 2 is complete or deferred. + +## Fixed Input Bucket + +`multi_mode_request = 4` + +The child plan owns only the currently remaining `G2` structured fail-closed scenes. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local Route 3 inventory and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 2 assets +3. Route 4+ assets + +## Expected Delta + +1. reduce the Route 3 bucket count +2. preserve current real-sample `G2` executed-pass + +## Stop Rule + +Stop after the Route 3 bucket is rerun and either: + +1. reduced, or +2. explicitly deferred with named blocker + diff --git a/docs/superpowers/specs/2026-04-19-g3-enrichment-request-closure-design.md b/docs/superpowers/specs/2026-04-19-g3-enrichment-request-closure-design.md new file mode 100644 index 0000000..85cf1ff --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g3-enrichment-request-closure-design.md @@ -0,0 +1,68 @@ +# G3 Enrichment Request Closure Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` +> Parent Layer: `Layer C + Layer D` + +## Intent + +Reduce the largest repeated `G3 / paginated_enrichment` fail-closed subgroup by recovering missing enrichment-request contract evidence without relaxing gates. + +## Fixed Input Bucket + +Primary bucket: + +`paginated_enrichment + g3_enrichment_contract + secondary_request` + +This child plan targets the repeated scenes whose structured fail-closed state shows: + +1. `g3_enrichment_contract_complete` failed +2. request/response contract failed because `secondary_request` is missing + +## Current Pattern + +The current follow-up assets show a repeated subgroup where: + +1. a main paginated scene is recognized +2. primary request shape is sufficiently visible to classify as `paginated_enrichment` +3. enrichment request extraction is not closed +4. secondary response extraction is not closed + +This is the first recovery slice because it appears more frequently than the export-plan-specific slice. + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local sweep follow-up assets created by this plan +6. route-local reports created by this plan + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. unrelated family manifests and promotion assets +3. Route 3, Route 4, Route 5, and Route 6 plan files + +## Expected Delta + +Expected delta is measured only against the Route 2 bucket: + +1. some `paginated_enrichment` fail-closed records should move from `g3_enrichment_contract` to either: + - `auto-pass`, or + - a narrower remaining contract blocker +2. no current `G3` canonical or real-sample pass may regress + +## Stop Rule + +Stop after: + +1. the targeted subgroup is rerun in a bounded way +2. coverage delta is measured +3. remaining unresolved `G3` fail-closed scenes are left for the next Route 2 child plan + +Do not absorb export-plan-specific work into this plan unless it is strictly required to preserve contract coherence for the targeted subgroup. + diff --git a/docs/superpowers/specs/2026-04-19-g3-export-plan-closure-design.md b/docs/superpowers/specs/2026-04-19-g3-export-plan-closure-design.md new file mode 100644 index 0000000..c269eaf --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g3-export-plan-closure-design.md @@ -0,0 +1,49 @@ +# G3 Export Plan Closure Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` +> Parent Layer: `Layer C + Layer D` + +## Intent + +Reduce the second repeated `G3 / paginated_enrichment` fail-closed subgroup by recovering missing export-plan evidence without loosening workflow completeness gates. + +## Fixed Input Bucket + +Primary bucket: + +`paginated_enrichment + g3_export_plan + export_plan` + +This child plan targets the repeated scenes whose structured fail-closed state shows: + +1. `workflow_contract_complete` and/or `workflow_complete_for_archetype` failed because `export_plan` is missing +2. `g3_export_path_identified` failed because `g3_export_plan` is incomplete + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local follow-up assets +6. route-local reports + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 3 and later implementation assets +3. promotion/board policy assets + +## Expected Delta + +1. reduce the count of `paginated_enrichment` fail-closed records driven primarily by export-plan absence +2. if scenes still fail, narrow them to a smaller residual blocker such as runtime scope or enrichment contract + +## Stop Rule + +Stop after the export-plan subgroup is rerun and the resulting residual bucket is explicitly measured. + +Do not continue into Route 2 residual closure under this plan. + diff --git a/docs/superpowers/specs/2026-04-19-g3-real-sample-archetype-correction-design.md b/docs/superpowers/specs/2026-04-19-g3-real-sample-archetype-correction-design.md new file mode 100644 index 0000000..04756d6 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g3-real-sample-archetype-correction-design.md @@ -0,0 +1,142 @@ +# G3 Real Sample Archetype Correction Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Roadmap: [2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/plans/2026-04-18-scene-skill-real-sample-validation-roadmap-plan.md) +> Trigger Report: [2026-04-19-g3-real-sample-execution-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g3-real-sample-execution-report.md) + +## Intent + +This design starts after the real-sample validation roadmap confirmed that the `G3` anchor real sample executes, but collapses into `local_doc_pipeline` and then fails closed. + +The purpose of this design is not to broaden the scene generator again. It is to correct the real-sample archetype routing boundary so that: + +1. the real sample `95598工单明细表` stays inside `G3 / paginated_enrichment` +2. `G8 / local_doc_pipeline` keeps its current boundary-family fail-closed role +3. `G3` and `G8` no longer compete on the same evidence tier for this real sample + +## Problem Statement + +The repo-local `G3` baseline is stable, but the real sample currently produces: + +1. executable input discovery +2. archetype collapse into `local_doc_pipeline` +3. fail-closed result due to incomplete `local_doc_pipeline` workflow evidence + +This means the strongest gap is no longer generic workflow incompleteness. The strongest gap is a real-sample archetype routing mismatch between: + +1. `G3` business request-chain evidence +2. `G8` local storage / document pipeline evidence + +## Scope + +This design covers only the bounded correction needed to resolve the above mismatch. + +Included: + +1. compare repo-local `G3` canonical evidence with real-sample evidence +2. split business request-chain evidence from local document pipeline evidence +3. re-order or tighten archetype routing between `G3` and `G8` +4. add regression coverage for the real-sample mismatch pattern +5. re-run the real sample and record the corrected outcome + +Excluded: + +1. opening `G4 / G5` +2. expanding `G6 / G7 / G8` runtime implementation +3. broad runtime integration work such as login recovery or transport redesign +4. generalized scene-generator redesign outside the `G3 vs G8` routing boundary +5. continuing batch expansion or new fixture-family growth unrelated to this mismatch + +## Design Principles + +1. Mainline first + The correction serves the mainline `G3` real-sample path first. Boundary-family preservation is a constraint, not the main objective. + +2. Fail-closed must remain intact + The fix must not weaken fail-closed behavior for truly incomplete `G3` or `G8` inputs. + +3. Evidence tier separation + Local SQL, doc export, local config, or helper persistence evidence must not outrank business request-chain evidence when the sample still has a recoverable `G3` chain. + +4. Real-sample anchored + Acceptance is defined by the real sample outcome, not by repo-local fixture success alone. + +## Evidence Model Adjustment + +The current mismatch implies that the analyzer and generator need a sharper evidence split between two layers: + +1. `business_workflow_evidence` + - main request + - pagination fields + - enrichment requests + - join keys + - export path connected to the business chain + +2. `local_pipeline_evidence` + - local persistence + - `definedSqlQuery` + - `docExport` + - local helper service + - host or localhost pipeline artifacts + +For this correction, the routing rule must treat `local_pipeline_evidence` as secondary when: + +1. the `G3` business chain is materially present +2. the local pipeline is downstream support or artifact generation +3. the sample still matches the `G3` minimal contract more strongly than the `G8` minimal contract + +## Routing Boundary Decision + +The required routing decision is: + +1. prefer `paginated_enrichment` when the sample contains: + - a main request + - pagination control + - at least one enrichment or detail chain + - join-key recoverability +2. route to `local_doc_pipeline` only when local pipeline evidence is the dominant workflow backbone and the business request chain cannot form a `G3` contract + +This means `G8` remains valid, but its trigger threshold must be higher when a recoverable `G3` mainline exists. + +## Expected Code Touch Points + +This design is expected to touch only the current generated-scene core: + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/ir.rs` +3. `src/generated_scene/generator.rs` +4. `tests/scene_generator_test.rs` +5. real-sample validation assets under `tests/fixtures/generated_scene/` + +## Validation Strategy + +The correction must be verified in three layers: + +1. deterministic routing regression + prove that the `G3 vs G8` evidence split behaves as intended + +2. generator regression + prove that the corrected path still compiles or fail-closes for the right reason + +3. real-sample rerun + prove that `95598工单明细表` no longer collapses into `local_doc_pipeline` + +## Success Criteria + +This design is considered satisfied when: + +1. the `G3` real sample no longer routes into `local_doc_pipeline` +2. the real sample resolves as `paginated_enrichment`, or fail-closes inside `G3` for a `G3`-specific reason +3. `G8` representative behavior remains intact +4. the real-sample validation layer records the corrected family outcome + +## Non-Goals + +This design does not try to guarantee that the `G3` real sample becomes fully runnable in one step. + +If the corrected run still fails, that is acceptable only when: + +1. the failure remains inside `G3` +2. the blocker is a real `G3` contract gap +3. the result no longer depends on accidental collapse into `G8` diff --git a/docs/superpowers/specs/2026-04-19-g3-real-sample-output-contract-verification-design.md b/docs/superpowers/specs/2026-04-19-g3-real-sample-output-contract-verification-design.md new file mode 100644 index 0000000..bebbf45 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g3-real-sample-output-contract-verification-design.md @@ -0,0 +1,59 @@ +# G3 Real Sample Output Contract Verification Design + +> Date: 2026-04-19 +> Upstream Closure: [2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g3-real-sample-runtime-contract-correction-closure-report.md) + +## 1. Intent + +The previous bounded plan corrected the remaining runtime-scope failure for the real sample `95598工单明细表`. + +The sample now: + +1. routes inside `G3 / paginated_enrichment` +2. passes `g3_runtime_scope_compatible` +3. reaches `readiness.level = A` + +The remaining mainline gap is narrower: + +`output_contract_not_verified` + +This design defines the next bounded scope: + +`G3 real-sample output / contract verification` + +## 2. Observed Remaining Gap + +The generated real-sample package now satisfies structural routing and runtime-scope gates, but the validation layer still does not verify: + +1. whether the main request / enrichment split matches the intended business output +2. whether recovered join keys and dedupe rules are semantically correct rather than merely syntactically complete +3. whether the current generated artifact shape matches the expected real-sample output contract + +This means the remaining risk is no longer routing or runtime admission. It is output-level contract fidelity. + +## 3. Scope Guardrails + +1. do not reopen the completed `G3` archetype-correction scope +2. do not reopen the completed `G3` runtime-scope correction scope +3. do not broaden this work into `G8` runtime implementation +4. do not reopen `G3` family expansion or new fixture growth +5. do not open `G4 / G5` +6. do not weaken fail-closed behavior to force a `passed` record + +## 4. Correction Target + +The bounded target is: + +1. verify the real-sample `G3` output contract against the intended business contract +2. narrow the remaining mismatch from generic `output_contract_not_verified` to: + - verified pass + - or a smaller named contract/output mismatch +3. keep the result anchored in the real sample rather than repo-local proxies + +## 5. Expected Outcome + +After this scope: + +1. the validation record for `rsv-g3-001` should either become `executed-pass` +2. or it should retain `executed-mismatch` with a more specific output/contract code than the current generic label +3. the next scope recommendation should move away from `G3` unless a genuinely narrower output issue remains diff --git a/docs/superpowers/specs/2026-04-19-g3-real-sample-runtime-contract-correction-design.md b/docs/superpowers/specs/2026-04-19-g3-real-sample-runtime-contract-correction-design.md new file mode 100644 index 0000000..6de7d10 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g3-real-sample-runtime-contract-correction-design.md @@ -0,0 +1,53 @@ +# G3 Real Sample Runtime Contract Correction Design + +> Date: 2026-04-19 +> Upstream Closure: [2026-04-19-g3-real-sample-archetype-correction-closure-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g3-real-sample-archetype-correction-closure-report.md) + +## 1. Intent + +The previous bounded plan corrected the real-sample routing boundary for `95598工单明细表`. + +The sample now stays in `G3 / paginated_enrichment`, but it still leaves a mainline gap: + +`g3_runtime_scope_compatible = false` + +This design defines the next bounded scope: + +`G3 real-sample runtime / contract correction` + +## 2. Observed Remaining Gap + +The corrected real-sample rerun shows: + +1. archetype is now `paginated_enrichment` +2. main request, pagination, enrichment, join keys, and export path are all present +3. the remaining blocker is the current runtime-scope rule, which treats the volume of localhost evidence as incompatible + +The current gate is too coarse for this real sample because the localhost evidence is subordinate to the restored business chain, not the controlling workflow backbone. + +## 3. Scope Guardrails + +1. do not reopen `G3` family expansion +2. do not broaden this work into `G8` runtime implementation +3. do not change `G6 / G7 / G8` behavior except where a shared generic gate must remain consistent +4. do not weaken fail-closed behavior for scenes that still do not satisfy the `G3` minimum contract +5. do not treat asset-only updates as progress unless they follow a real rerun result + +## 4. Correction Target + +The bounded target is: + +1. keep the real sample in `paginated_enrichment` +2. narrow `g3_runtime_scope_compatible` so it distinguishes: + - subordinate host-runtime dependencies inside a valid `G3` business chain + - dominant host-runtime dependencies that still justify fail-closed +3. preserve explicit visibility of remaining output or data-verification gaps + +## 5. Expected Outcome + +After correction: + +1. the real sample should still resolve as `paginated_enrichment` +2. `g3_runtime_scope_compatible` should pass when localhost evidence is present but subordinate +3. any remaining mismatch should move from `runtime_scope_gap` to a narrower contract or output-verification gap +4. `G8` representative behavior must not regress diff --git a/docs/superpowers/specs/2026-04-19-g3-residual-contract-closure-design.md b/docs/superpowers/specs/2026-04-19-g3-residual-contract-closure-design.md new file mode 100644 index 0000000..eba31db --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g3-residual-contract-closure-design.md @@ -0,0 +1,52 @@ +# G3 Residual Contract Closure Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 2: G3 / paginated_enrichment` +> Parent Layer: `Layer C + Layer D` + +## Intent + +Handle the remaining `G3` fail-closed records that are still unresolved after the enrichment-request and export-plan child plans have finished. + +## Fixed Input Bucket + +Residual `G3 / paginated_enrichment` records after: + +1. `G3 enrichment-request closure` +2. `G3 export-plan closure` + +Expected residual themes: + +1. `g3_runtime_scope` +2. `join_key` +3. mixed residual contract blockers + +## Allowed Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. `src/generated_scene/ir.rs` +4. `tests/scene_generator_test.rs` +5. route-local residual inventory and report assets + +## Forbidden Files + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. Route 3+ plan files + +## Expected Delta + +1. reduce the residual Route 2 bucket after the first two child plans +2. or explicitly defer a smaller residual set with named blockers + +## Stop Rule + +Stop when: + +1. the residual Route 2 bucket is either materially reduced, or +2. the remaining residual Route 2 scenes are explicitly named and deferred + +After this point, Route 2 is considered complete or deferred. + diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-semantics-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-semantics-design.md new file mode 100644 index 0000000..a8ec78f --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-semantics-design.md @@ -0,0 +1,51 @@ +# G6 Host-Bridge Callback Semantics Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: [2026-04-19-g6-host-bridge-execution-semantics-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g6-host-bridge-execution-semantics-report.md) + +## 1. Intent + +This design defines the next bounded slice after `G6 host-bridge execution semantics`. + +The target is: + +`G6 host-bridge callback semantics` + +## 2. Why This Slice + +The previous semantic slice isolated two seams, but the tighter next pressure is callback-side semantics: + +1. invocation semantics are already identified +2. callback completion semantics still determine whether later real execution can be bounded safely + +## 3. Scope Boundary + +This design is limited to callback semantics only. + +It may include: + +1. defining completion states for callback requests +2. defining blocked/error/partial/ok transitions +3. defining how callback semantics constrain later real-sample entry + +It must not include: + +1. implementing host-runtime directly +2. executing a `G6` real sample +3. reopening `G7` +4. opening `G8` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6 host-bridge callback semantics` plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes callback semantics only +2. separates completion state logic from transport/runtime implementation +3. emits one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-state-verification-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-state-verification-design.md new file mode 100644 index 0000000..8a1f93f --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-callback-state-verification-design.md @@ -0,0 +1,53 @@ +# G6 Host-Bridge Callback State Verification Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: [2026-04-19-g6-host-bridge-callback-semantics-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-semantics-report.md) + +## 1. Intent + +This design defines the next bounded slice after `G6 host-bridge callback semantics`. + +The target is: + +`G6 host-bridge callback state verification` + +## 2. Why This Slice + +The callback states are now explicit, but they have not yet been bounded into a verification-oriented slice. + +The next pressure is narrower: + +1. verify state transitions as a bounded model +2. keep that verification separate from real execution and host-runtime implementation + +## 3. Scope Boundary + +This design is limited to callback state verification only. + +It may include: + +1. defining bounded verification targets for `ok/partial/blocked/error` +2. defining what evidence is sufficient for each transition +3. defining how verification narrows a later `G6` real-sample entry + +It must not include: + +1. implementing host-runtime directly +2. executing a `G6` real sample +3. opening `G8` +4. reopening `G7` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6 host-bridge callback state verification` plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes callback-state verification scope +2. defines bounded verification targets +3. emits one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-design.md new file mode 100644 index 0000000..bfdc664 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-design.md @@ -0,0 +1,52 @@ +# G6 Host-Bridge Entry Gate Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: [2026-04-19-g6-host-bridge-entry-readiness-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-readiness-report.md) + +## 1. Intent + +This design defines the next bounded slice after `G6 host-bridge entry readiness`. + +The target is: + +`G6 host-bridge entry gate` + +## 2. Why This Slice + +The semantic readiness criteria are now explicit. + +The next bounded pressure is: + +1. turn those criteria into a bounded future entry gate without opening real execution + +## 3. Scope Boundary + +This design is limited to gate modeling only. + +It may include: + +1. defining pass/fail gate conditions for a future `G6` entry slice +2. defining which readiness criteria are hard blockers +3. defining how the gate narrows later real-sample entry + +It must not include: + +1. executing a `G6` real sample +2. implementing host-runtime directly +3. opening `G8` +4. reopening `G7` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6 host-bridge entry gate` plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes gate-model scope +2. defines bounded gate conditions +3. emits one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-verification-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-verification-design.md new file mode 100644 index 0000000..f948ad6 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-gate-verification-design.md @@ -0,0 +1,52 @@ +# G6 Host-Bridge Entry Gate Verification Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: [2026-04-19-g6-host-bridge-entry-gate-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g6-host-bridge-entry-gate-report.md) + +## 1. Intent + +This design defines the next bounded slice after `G6 host-bridge entry gate`. + +The target is: + +`G6 host-bridge entry gate verification` + +## 2. Why This Slice + +The future entry gate is now explicit. + +The next bounded pressure is: + +1. verify the gate model itself before any later real-sample entry slice is considered + +## 3. Scope Boundary + +This design is limited to gate verification only. + +It may include: + +1. defining bounded verification targets for the hard gate +2. defining how fail-close reasons are checked +3. defining how gate verification narrows later `G6` entry work + +It must not include: + +1. executing a `G6` real sample +2. implementing host-runtime directly +3. opening `G8` +4. reopening `G7` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6 host-bridge entry gate verification` plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes gate-verification scope +2. defines bounded verification targets +3. emits one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-readiness-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-readiness-design.md new file mode 100644 index 0000000..b6f3aaf --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-entry-readiness-design.md @@ -0,0 +1,51 @@ +# G6 Host-Bridge Entry Readiness Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: [2026-04-19-g6-host-bridge-callback-state-verification-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g6-host-bridge-callback-state-verification-report.md) + +## 1. Intent + +This design defines the next bounded slice after `G6 host-bridge callback state verification`. + +The target is: + +`G6 host-bridge entry readiness` + +## 2. Why This Slice + +The callback states are now explicit and their verification priority is explicit. + +The next bounded pressure is: + +1. determine whether those bounded semantics are sufficient to define a future `G6` real-sample entry gate + +## 3. Scope Boundary + +This design is limited to entry-readiness modeling only. + +It may include: + +1. defining bounded readiness criteria for future `G6` entry +2. defining which semantic pieces must be present before `G6` real-sample execution may be opened + +It must not include: + +1. executing a `G6` real sample +2. implementing host-runtime directly +3. opening `G8` +4. reopening `G7` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6 host-bridge entry readiness` plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes entry-readiness scope +2. defines bounded readiness criteria +3. emits one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-execution-semantics-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-execution-semantics-design.md new file mode 100644 index 0000000..6f6a67c --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-execution-semantics-design.md @@ -0,0 +1,51 @@ +# G6 Host-Bridge Execution Semantics Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: [2026-04-19-g6-host-bridge-prerequisites-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g6-host-bridge-prerequisites-report.md) + +## 1. Intent + +This design defines the next bounded slice after `G6 host-bridge prerequisites` isolates the minimum blocked capability. + +The target is: + +`G6 host-bridge execution semantics` + +## 2. Why This Slice + +This slice is selected because the remaining `G6` gap is narrower than broad host-runtime implementation: + +1. bridge action invocation semantics +2. callback completion semantics + +## 3. Scope Boundary + +This design is limited to semantic scoping only. + +It may include: + +1. defining the minimum bridge action semantic +2. defining the minimum callback completion semantic +3. defining how those semantics bound later `G6` real-sample entry + +It must not include: + +1. implementing host-runtime directly +2. executing a `G6` real sample +3. opening `G8` +4. reopening `G7` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6 host-bridge execution semantics` plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes the semantic boundary +2. separates bridge invocation from callback completion +3. emits one bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-host-bridge-prerequisites-design.md b/docs/superpowers/specs/2026-04-19-g6-host-bridge-prerequisites-design.md new file mode 100644 index 0000000..a8f93ff --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-host-bridge-prerequisites-design.md @@ -0,0 +1,53 @@ +# G6 Host-Bridge Prerequisites Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Decision: [2026-04-19-boundary-runtime-prerequisites-decision-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-boundary-runtime-prerequisites-decision-report.md) + +## 1. Intent + +This design defines the next bounded slice after the boundary-runtime prerequisites roadmap selects `G6 host-bridge prerequisites`. + +The target is: + +`G6 host-bridge prerequisites` + +## 2. Why G6 First + +`G6` is selected because: + +1. it is blocked by one clearer prerequisite line +2. that prerequisite line is narrower than the combined local-doc and attachment burden on `G8` +3. it is the smaller bounded next step after `G7` + +## 3. Scope Boundary + +This design is limited to prerequisite scoping only. + +It may include: + +1. isolating the minimum host-bridge execution semantics needed before `G6` real-sample entry +2. defining a bounded prerequisite slice +3. publishing one follow-up bounded plan + +It must not include: + +1. executing a `G6` real sample +2. implementing host-runtime directly +3. reopening `G7` +4. opening `G8` +5. opening `G4 / G5` + +## 4. Target Outcome + +The bounded target outcome is one state: + +1. one bounded `G6` prerequisites plan + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes the `G6` prerequisite boundary +2. isolates the minimum blocked host-bridge capability +3. publishes a bounded follow-up plan diff --git a/docs/superpowers/specs/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-design.md b/docs/superpowers/specs/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-design.md new file mode 100644 index 0000000..4509b04 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g6-real-sample-entry-preparation-and-bounded-execution-design.md @@ -0,0 +1,111 @@ +# G6 Real-Sample Entry Preparation And Bounded Execution Design + +> Date: 2026-04-19 +> Status: Draft +> Replaces Further G6 Micro-Planning: use this design as the single surviving `G6` design reference + +## 1. Intent + +This design replaces the previous over-split `G6` micro-planning chain with one implementation-oriented bounded design. + +The target is: + +`G6 real-sample entry preparation and bounded execution` + +## 2. Why This Redesign Exists + +The prior `G6` work was split too finely into: + +1. prerequisites +2. execution semantics +3. callback semantics +4. callback-state verification +5. entry readiness +6. entry gate + +That chain produced useful conclusions, but it also created planning recursion. + +This redesign stops that recursion. + +The older `G6` planning documents are now treated only as input material, not as separate execution tracks. + +## 3. Preserved Inputs + +The only conclusions preserved from the earlier `G6` planning chain are: + +1. `G6` already has classification, family preservation, and a minimum runtime-contract shape +2. the remaining pressure is `host bridge real execution semantics` +3. callback completion states are already explicit: + - `blocked` + - `error` + - `partial` + - `ok` +4. the future `G6` fail-close reasons are already explicit: + - `g6_bridge_invocation_semantics_missing` + - `g6_callback_completion_semantics_missing` + - `g6_callback_state_targets_missing` + +No further `G6` semantic sub-plans should be opened for the same topic. + +## 4. Scope Boundary + +This design is limited to one bounded `G6` mainline preparation-and-execution slice. + +It may include: + +1. freezing one final `G6` entry gate +2. implementing one minimum host-bridge execution seam +3. running one fixed `G6` real sample +4. writing back one bounded validation result + +It must not include: + +1. opening more `G6` semantic sub-plans +2. reopening `G7` +3. opening `G8` +4. opening `G4 / G5` +5. broad host-runtime platform redesign +6. multi-sample `G6` family expansion + +## 5. Fixed Target + +This design allows only one `G6` fixed real-sample anchor. + +The exact sample must remain the existing `G6` representative real sample already referenced by current boundary-family materials. + +No second `G6` real sample may be introduced under this design. + +## 6. Target Outcome + +The bounded target outcome is only one of two states: + +1. `executed-pass` +2. `named mismatch` + +The design explicitly rejects a third outcome of “write another semantic clarification plan”. + +## 7. Stop Conditions + +This redesign introduces hard stop conditions: + +1. once the fixed `G6` real sample is executed, no new `G6` semantic sub-plan may be created +2. if the result is `mismatch`, only an implementation correction plan may follow +3. if the result is `executed-pass`, the `G6` line closes immediately + +## 8. Execution Shape + +The single surviving `G6` execution shape is: + +1. freeze the final entry gate +2. implement the minimum host-bridge execution seam +3. run the fixed real sample once +4. update validation assets and close + +## 9. Exit Condition + +This design is complete when one bounded plan exists that: + +1. freezes the final `G6` gate +2. moves directly into implementation +3. runs one fixed real sample +4. closes with `executed-pass` or `named mismatch` diff --git a/docs/superpowers/specs/2026-04-19-g7-real-sample-entry-design.md b/docs/superpowers/specs/2026-04-19-g7-real-sample-entry-design.md new file mode 100644 index 0000000..0740def --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-g7-real-sample-entry-design.md @@ -0,0 +1,57 @@ +# G7 Real-Sample Entry Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Decision: [2026-04-19-boundary-family-entry-decision-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-boundary-family-entry-decision-report.md) + +## 1. Intent + +This design defines the next bounded execution slice after the boundary-family entry roadmap selected `G7` as the only next candidate. + +The target is: + +`G7 real-sample entry` + +## 2. Why G7 + +`G7` is selected because: + +1. it already has a minimal runnable runtime contract +2. it does not require host bridge execution semantics as strongly as `G6` +3. it does not require local document pipeline and attachment handling as strongly as `G8` + +## 3. Scope Boundary + +This design is limited to one representative `G7` real sample: + +1. `计量资产库存统计` + +It may include: + +1. real-sample contract differential +2. bounded real-sample rerun +3. validation-layer update if the result narrows + +It must not include: + +1. new `G7` family expansion +2. new runtime-platform work +3. `G6` or `G8` execution +4. `G4 / G5` + +## 4. Target Outcome + +The bounded target outcome is one of two states: + +1. `executed-pass` +2. or a smaller named `G7` real-sample mismatch + +The design rejects opening generalized boundary-family work beyond this one representative sample. + +## 5. Exit Condition + +This design is complete when implementation can be bounded to one plan that: + +1. freezes one `G7` real sample +2. reruns it against the existing minimal `G7` runtime contract +3. updates the validation layer with a narrower outcome diff --git a/docs/superpowers/specs/2026-04-19-host-bridge-runtime-roadmap-design.md b/docs/superpowers/specs/2026-04-19-host-bridge-runtime-roadmap-design.md new file mode 100644 index 0000000..e0bd140 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-host-bridge-runtime-roadmap-design.md @@ -0,0 +1,27 @@ +# Host-Bridge Runtime Roadmap Design + +> Date: 2026-04-19 +> Parent Sequence: `2026-04-19-final-2-residual-child-plan-sequence-plan.md` +> Fixed Scene: `sweep-085-scene` + +## Intent + +Close or narrow the remaining host-bridge runtime residual without resurrecting the old G6 semantics micro-plan chain. + +## Fixed Scope + +Only `sweep-085-scene` is in scope. + +## Minimal Success Definition + +The scene must either: + +1. become `framework-auto-pass-candidate`; or +2. remain `framework-structured-fail-closed` with a narrower named host-bridge runtime hold. + +## Forbidden Scope + +1. no general host-runtime transport implementation +2. no new G6 semantics micro-plan +3. no changes to G1-E/G2/G3 routes +4. no new family diff --git a/docs/superpowers/specs/2026-04-19-local-doc-official-board-reconciliation-refresh-design.md b/docs/superpowers/specs/2026-04-19-local-doc-official-board-reconciliation-refresh-design.md new file mode 100644 index 0000000..c22f208 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-local-doc-official-board-reconciliation-refresh-design.md @@ -0,0 +1,70 @@ +# Local-Doc Official Board Reconciliation Refresh Design + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Roadmap: `2026-04-19-local-doc-runtime-roadmap-plan.md` +> Layer: `Layer E / official board reconciliation` + +## Intent + +Consume the five local-doc reconciliation candidates produced by the local-doc runtime roadmap and refresh only their framework status in the official execution board. + +This design exists because the local-doc roadmap intentionally stopped before official board update. The board update needs a bounded reconciliation refresh that applies the promotion policy without modifying generation logic. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/local_doc_runtime_reconciliation_candidates_2026-04-19.json` +2. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Scope + +Only these five scene ids are in scope: + +1. `sweep-033-scene` +2. `sweep-034-scene` +3. `sweep-042-scene` +4. `sweep-051-scene` +5. `sweep-074-scene` + +## Refresh Rule + +If a fixed-scope scene is present in the local-doc candidate asset as `framework-auto-pass-candidate`, update only framework-layer board fields: + +- `currentFrameworkStatus` +- `currentFrameworkCandidateStatus` +- `currentFrameworkArchetype` +- `currentFrameworkReadiness` +- `currentFrameworkSource` +- `currentFrameworkDecisionOverlay` +- `currentFrameworkNextAction` +- `currentFrameworkCanAutoUpdateBoard` + +Workbook snapshot fields, `currentGroup`, `currentStatus`, real-sample fields, and scene names are preserved. + +## Forbidden Scope + +1. Do not modify `src/generated_scene/analyzer.rs`. +2. Do not modify `src/generated_scene/generator.rs`. +3. Do not rerun the 102 sweep. +4. Do not update host-bridge residuals. +5. Do not update bootstrap residuals. +6. Do not rename official-board scenes. +7. Do not promote non-framework business status. + +## Expected Result + +The official board framework summary moves from: + +- `framework-auto-pass = 95` +- `framework-structured-fail-closed = 7` + +to: + +- `framework-auto-pass = 100` +- `framework-structured-fail-closed = 2` + +The remaining two structured fail-closed records should be: + +- one host-bridge runtime residual; +- one bootstrap target normalization residual. diff --git a/docs/superpowers/specs/2026-04-19-local-doc-runtime-roadmap-design.md b/docs/superpowers/specs/2026-04-19-local-doc-runtime-roadmap-design.md new file mode 100644 index 0000000..e5cffac --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-local-doc-runtime-roadmap-design.md @@ -0,0 +1,61 @@ +# Local-Doc Runtime Roadmap Design + +> Date: 2026-04-19 +> Parent Decision: `2026-04-19-residual-runtime-roadmap-prioritization-design.md` +> Parent Residual Bucket: `local_doc_pipeline` +> Status: Draft + +## Intent + +Define the next bounded roadmap for the five remaining `local_doc_pipeline` residuals. + +This roadmap exists because official board reconciliation identified five explained structured fail-closed scenes that require local document runtime and attachment/document handling semantics. + +## Fixed Input Bucket + +The roadmap is limited to the five official board residuals with: + +1. `currentFrameworkStatus = framework-structured-fail-closed` +2. `currentFrameworkArchetype = local_doc_pipeline` +3. `currentFrameworkNextAction = future-local-doc-runtime-roadmap-input` + +## Target Scenes + +1. `sweep-033-scene` / `供电可靠率指标统计表` +2. `sweep-034-scene` / `供电可靠性数据质量自查报告月报` +3. `sweep-042-scene` / `国网金昌供电公司营商环境周例会报告` +4. `sweep-051-scene` / `嘉峪关可靠性分析报告` +5. `sweep-074-scene` / `同兴智能安全督查日报` + +## Roadmap Goal + +Move the five local-doc residuals from generic structured fail-closed to one of: + +1. runnable local-doc contract; +2. named local-doc runtime missing capability; +3. explicit non-goal that remains policy-held. + +## Boundary + +This design must not: + +1. modify host-bridge runtime; +2. open bootstrap target normalization; +3. add a new family; +4. update the official board without a dedicated reconciliation step; +5. treat local document runtime as generic paginated enrichment. + +## Required Work Areas + +1. local document source evidence extraction; +2. document artifact contract; +3. attachment/input dependency modeling; +4. local pipeline execution seam; +5. fail-closed reasons specific to local-doc runtime. + +## Acceptance Criteria + +1. The five target scenes remain the only input bucket. +2. Each scene has a local-doc runtime decision. +3. Any implementation step remains bounded to `local_doc_pipeline`. +4. Follow-up reconciliation is explicit and separate. diff --git a/docs/superpowers/specs/2026-04-19-official-board-reconciliation-design.md b/docs/superpowers/specs/2026-04-19-official-board-reconciliation-design.md new file mode 100644 index 0000000..bfdbcc5 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-official-board-reconciliation-design.md @@ -0,0 +1,63 @@ +# Official Board Reconciliation Design + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-design.md` +> Parent Layer: `Layer E` +> Status: Active + +## Intent + +Apply the final 102-scene coverage rollup to the official execution board in a controlled, auditable way. + +This design is the only point where `scene_execution_board_2026-04-18.json` may be updated from the final coverage rollup. + +## Inputs + +1. `tests/fixtures/generated_scene/final_coverage_status_rollup_2026-04-19.json` +2. `tests/fixtures/generated_scene/promotion_board_reconciliation_policy_2026-04-19.json` +3. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +## Outputs + +1. updated `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/official_board_reconciliation_2026-04-19.json` +3. `docs/superpowers/reports/2026-04-19-official-board-reconciliation-report.md` + +## Status Mapping + +1. `framework-auto-pass-candidate` maps to `framework-auto-pass`. +2. `framework-structured-fail-closed` maps to `framework-structured-fail-closed`. +3. `framework-valid-host-bridge` maps to `framework-valid-host-bridge`. +4. unresolved raw statuses remain explicit and must not be collapsed. + +## Board Fields + +The reconciliation may add or update only current framework fields: + +1. `currentFrameworkStatus` +2. `currentFrameworkArchetype` +3. `currentFrameworkReadiness` +4. `currentFrameworkSource` +5. `currentFrameworkDecisionOverlay` +6. `currentFrameworkNextAction` +7. `currentFrameworkCanAutoUpdateBoard` + +Existing frozen workbook snapshot fields must be preserved. + +## Boundary + +This design must not: + +1. modify `src/generated_scene/analyzer.rs`; +2. modify `src/generated_scene/generator.rs`; +3. rerun the 102 sweep; +4. promote real-sample validation status; +5. remove existing snapshot fields from the board. + +## Acceptance Criteria + +1. Official board still contains exactly `102` scenes. +2. Final framework status counts are `95` framework auto-pass and `7` structured fail-closed. +3. No unresolved framework status remains. +4. Reconciliation JSON records all updated scenes. +5. Report explains the remaining `7` residuals and the next roadmap inputs. diff --git a/docs/superpowers/specs/2026-04-19-post-g7-boundary-decision-roadmap-design.md b/docs/superpowers/specs/2026-04-19-post-g7-boundary-decision-roadmap-design.md new file mode 100644 index 0000000..3841bc6 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-post-g7-boundary-decision-roadmap-design.md @@ -0,0 +1,111 @@ +# Post-G7 Boundary Decision Roadmap Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Validation Layer: [real_sample_validation_records_2026-04-18.json](D:/data/ideaSpace/rust/sgClaw/claw-new/tests/fixtures/generated_scene/real_sample_validation_records_2026-04-18.json) +> Upstream Entry Rules: [boundary_runtime_entry_rules_2026-04-18.json](D:/data/ideaSpace/rust/sgClaw/claw-new/tests/fixtures/generated_scene/boundary_runtime_entry_rules_2026-04-18.json) +> Upstream Closure: [2026-04-19-g7-real-sample-entry-closure-report.md](D:/data/ideaSpace/rust/sgClaw/claw-new/docs/superpowers/reports/2026-04-19-g7-real-sample-entry-closure-report.md) + +## 1. Intent + +This design defines the next bounded roadmap after `G7` has closed as the first executed boundary-family real sample. + +The current validated state is now: + +1. `G1-E = executed-pass` +2. `G2 = executed-pass` +3. `G3 = executed-pass` +4. `G7 = executed-pass` + +So the next roadmap must not reopen any closed mainline slice and must not continue extending the finished `G7` plan. + +The only question under this roadmap is: + +`After G7, should another boundary family enter real-sample scope next, or should boundary work stop and defer to prerequisites?` + +## 2. Problem Statement + +The prior boundary-entry roadmap solved the first ambiguity by selecting `G7`. + +That ambiguity is now closed. + +The remaining ambiguity is narrower: + +1. whether `G6` is now the next justified boundary-family entry candidate +2. whether `G8` is now the next justified boundary-family entry candidate +3. or whether both should remain held and a bounded prerequisites roadmap should be opened first + +Without a new roadmap, the next step would drift into one of three bad outcomes: + +1. reopening `G7` after closure +2. opening both `G6` and `G8` at once +3. starting runtime-platform implementation without a bounded decision slice + +## 3. Scope Boundary + +This roadmap is limited to a post-`G7` boundary-family decision. + +It may include: + +1. restating the now-closed `G7` result +2. comparing only `G6` and `G8` as remaining boundary candidates +3. determining whether one of them is admitted next +4. or determining that both remain held and a prerequisites slice is needed +5. publishing one bounded follow-up `design + plan` + +It must not include: + +1. reopening `G7` implementation or expansion +2. reopening `G1-E / G2 / G3` +3. opening `G4 / G5` +4. implementing host-runtime, transport, or local-doc prerequisites +5. executing real samples for more than one boundary family + +## 4. Current Decision Inputs + +The current repo state already provides the relevant admission constraints: + +1. `G6` still needs stronger host-bridge real execution semantics than current repo-local coverage +2. `G8` still needs stronger local document pipeline and attachment/runtime handling than current repo-local coverage +3. `G7` is no longer a candidate because it has already closed as an executed pass + +These are decision inputs only. + +They are not yet implementation tasks. + +## 5. Roadmap Goal + +The goal of this roadmap is to reduce the post-`G7` boundary question to one bounded next step: + +1. select exactly one next bounded direction +2. either `G6` +3. or `G8` +4. or a prerequisites-only slice with both held + +## 6. Preferred Outcome + +The preferred outcome is: + +1. either one selected next boundary family +2. or one bounded prerequisites roadmap +3. with the non-selected direction explicitly held + +## 7. Acceptance Logic + +This roadmap is successful when: + +1. `G6` and `G8` no longer compete ambiguously +2. `G7` is not reopened +3. only one bounded next direction is emitted +4. no runtime-platform implementation is started under roadmap scope + +## 8. Out of Scope + +The following are explicitly out of scope: + +1. new scene-generator family work +2. new canonical answers +3. new mainline contract correction +4. direct host-runtime implementation +5. direct local-doc runtime implementation +6. `G4 / G5` diff --git a/docs/superpowers/specs/2026-04-19-promotion-and-board-reconciliation-policy-design.md b/docs/superpowers/specs/2026-04-19-promotion-and-board-reconciliation-policy-design.md new file mode 100644 index 0000000..c79fb18 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-promotion-and-board-reconciliation-policy-design.md @@ -0,0 +1,48 @@ +# Promotion And Board Reconciliation Policy Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Route: `Route 6: promotion and board reconciliation` +> Parent Layer: `Layer E` + +## Intent + +Define how stronger framework-resolved statuses may update the official scene state representation without over-promoting diagnostics. + +## Fixed Input Bucket + +This route owns policy, not an archetype bucket. + +Fixed policy inputs: + +1. `auto-pass` +2. `fail-closed-known` +3. `adjudicated-valid-host-bridge` +4. hygiene-aware timeout interpretation + +## Allowed Files + +1. policy design and policy plan docs +2. reconciliation-policy JSON assets +3. policy reports + +## Forbidden Files + +1. `src/generated_scene/analyzer.rs` +2. `src/generated_scene/generator.rs` +3. direct scene promotion inside `scene_execution_board_2026-04-18.json` + +## Expected Delta + +Policy-only delta: + +1. future board updates become rule-driven +2. diagnostics and promotion are no longer conflated + +## Stop Rule + +Stop after the policy rules are published. + +Do not apply policy updates to the execution board under this plan. + diff --git a/docs/superpowers/specs/2026-04-19-remaining-route-conflict-correction-design.md b/docs/superpowers/specs/2026-04-19-remaining-route-conflict-correction-design.md new file mode 100644 index 0000000..1bb4bcf --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-remaining-route-conflict-correction-design.md @@ -0,0 +1,93 @@ +# Remaining Route Conflict Correction Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Report: `docs/superpowers/reports/2026-04-19-102-full-sweep-improvement-coverage-delta-report.md` + +## Design Intent + +Resolve the remaining `4` route conflicts from the follow-up `102` sweep without reopening the broader full-sweep improvement roadmap. + +The design answers: + +`which of the remaining G3/G2 vs host_bridge_workflow conflicts should be corrected, and which should be formally adjudicated as valid host-bridge workflows?` + +## Fixed Input + +The fixed input set is exactly the `4` misclassified records from `tests/fixtures/generated_scene/full_sweep_improvement_followup_2026-04-19.json`: + +| Scene | Expected group | Expected archetype | Current inferred archetype | +| --- | --- | --- | --- | +| `95598报修工单日管控` | `G3` | `paginated_enrichment` | `host_bridge_workflow` | +| `95598重要服务事项报备统计表` | `G3` | `paginated_enrichment` | `host_bridge_workflow` | +| `台区线损台区月度高负损预测` | `G2` | `multi_mode_request` | `host_bridge_workflow` | +| `配网支撑月报(95598抢修统计报表)` | `G3` | `paginated_enrichment` | `host_bridge_workflow` | + +## Scope Guardrails + +1. do not add new scene families +2. do not reopen timeout work +3. do not reopen readiness-before-report work +4. do not update `scene_execution_board_2026-04-18.json` +5. do not promote scenes automatically +6. do not weaken `G6` host-bridge real-sample pass +7. do not weaken `G2` or `G3` canonical / real-sample pass +8. do not make `host_bridge_workflow` lose when it is the only complete contract + +## Route Decision Model + +Each conflict must be assigned exactly one final route decision: + +1. `route-corrected-to-g3` +2. `route-corrected-to-g2` +3. `valid-host-bridge-workflow` +4. `board-expectation-stale` +5. `route-conflict-unresolved` + +## Evidence Rules + +### G3 Wins Over G6 Only When + +1. business endpoint evidence is present +2. pagination evidence is present +3. response path evidence is present +4. at least one of enrichment, join-key, or export workflow evidence is present +5. host bridge evidence is subordinate rather than the only execution path + +### G2 Wins Over G6 Only When + +1. line-loss / electricity business signal is present +2. mode or prediction signal is present +3. request contract can be inferred +4. host bridge evidence is subordinate rather than the only execution path + +### G6 Remains Valid When + +1. host bridge action is the only complete execution path +2. callback / localhost dependency dominates the workflow +3. business-chain evidence does not close the expected G2/G3 contract + +## Expected Deliverables + +1. route conflict decision JSON +2. route conflict correction report +3. bounded routing regression tests if implementation correction is needed +4. follow-up probe result for the same `4` records + +## Completion Criteria + +This design is complete when: + +1. all `4` conflicts have explicit final decisions +2. corrected routes are verified by targeted generation probes +3. valid host-bridge cases remain documented rather than forced into G2/G3 +4. existing `G2/G3/G6` regressions still pass + +## Out of Scope + +1. full `102` sweep rerun unless explicitly required after route correction +2. timeout optimization +3. new family creation +4. login / host runtime implementation +5. execution board status sync + diff --git a/docs/superpowers/specs/2026-04-19-residual-runtime-roadmap-prioritization-design.md b/docs/superpowers/specs/2026-04-19-residual-runtime-roadmap-prioritization-design.md new file mode 100644 index 0000000..4b9020d --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-residual-runtime-roadmap-prioritization-design.md @@ -0,0 +1,47 @@ +# Residual Runtime Roadmap Prioritization Design + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-design.md` +> Parent Layer: `Layer E` +> Status: Active + +## Intent + +Choose the next bounded roadmap after official board reconciliation. + +The official board now has `95` framework auto-pass scenes and `7` explained structured fail-closed residuals. This design compares the three residual roadmap inputs and selects exactly one next roadmap. + +## Inputs + +1. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. `tests/fixtures/generated_scene/official_board_reconciliation_2026-04-19.json` + +## Candidate Roadmaps + +1. `local-doc runtime roadmap` +2. `host-bridge runtime roadmap` +3. `bootstrap target normalization roadmap` + +## Decision Criteria + +1. `impact`: number of residual scenes addressed. +2. `scope clarity`: whether the required implementation boundary is clear. +3. `prerequisite weight`: whether the roadmap requires large external runtime work. +4. `risk`: likelihood of disturbing the already reconciled `95` framework auto-pass scenes. + +## Boundary + +This design is decision-only. It must not: + +1. modify `src/generated_scene/analyzer.rs`; +2. modify `src/generated_scene/generator.rs`; +3. modify `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json`; +4. start runtime implementation; +5. add a new family. + +## Acceptance Criteria + +1. All `7` residual records are accounted for. +2. Exactly one next roadmap is selected. +3. Deferred roadmaps have explicit reasons. +4. A next bounded design/plan is created for the selected roadmap only. diff --git a/docs/superpowers/specs/2026-04-19-scene-skill-102-final-materialization-design.md b/docs/superpowers/specs/2026-04-19-scene-skill-102-final-materialization-design.md new file mode 100644 index 0000000..4f16b92 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-scene-skill-102-final-materialization-design.md @@ -0,0 +1,98 @@ +# Scene Skill 102 Final Materialization Design + +> Date: 2026-04-19 +> Parent Framework: `2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Upstream Status: `framework-auto-pass = 102 / 102` + +## Intent + +Freeze a single final skill asset set for the current 102 scenes. + +The previous framework work proved that all 102 scenes can be adapted at the framework layer, but the generated skill packages are spread across multiple follow-up directories. Before static, mock, or production-like validation, the project needs one canonical materialized skill set. + +## Key Decision + +Do not clean or overwrite existing `examples/*` follow-up directories. + +Instead, create a new isolated materialization root: + +`examples/scene_skill_102_final_materialization_2026-04-19` + +Rationale: + +1. previous `examples/*` directories are audit artifacts for earlier plans; +2. deleting them would destroy provenance; +3. a new root gives validation a stable input set; +4. final materialization can be repeated without mutating history. + +## Inputs + +1. official board: `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +2. final framework rollup: `tests/fixtures/generated_scene/scene_skill_102_framework_closure_rollup_2026-04-19.json` +3. scene source root: `D:/desk/智能体资料/全量业务场景/一平台场景` +4. generator binary: `cargo run --bin sg_scene_generate` + +## Required Hygiene + +Before generation, build a clean materialization input manifest. + +The official board is the status authority, but it may contain historical encoding or control-character artifacts. The materialization manifest must therefore validate: + +1. exactly 102 rows; +2. unique scene ids; +3. source directory exists for each row; +4. scene name used for generation is stable; +5. unsafe control characters are not propagated into final manifest fields. + +## Output Layout + +```text +examples/scene_skill_102_final_materialization_2026-04-19/ + skills/ + sweep-001-scene/ + SKILL.toml + SKILL.md + scene.toml + scripts/ + references/ + generation-report.json + generation-report.md + manifest/ + scene_skill_102_final_materialization_manifest_2026-04-19.json +``` + +Repository-level fixture outputs: + +1. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +2. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` + +Report: + +1. `docs/superpowers/reports/2026-04-19-scene-skill-102-final-materialization-report.md` + +## Success Criteria + +The materialization is successful when: + +1. all 102 scene ids are attempted; +2. all 102 have a generated skill directory; +3. each generated skill directory has required files; +4. each generated report has `readiness.level = A` or otherwise has a named failure in the failures asset; +5. the manifest is the only input to later static/mock validation plans. + +## Non-Goals + +1. no production execution; +2. no mock validation; +3. no static validation beyond presence/manifest checks; +4. no deletion of old `examples/*`; +5. no official board mutation; +6. no new family or runtime implementation. + +## Follow-Up + +After materialization succeeds, the next roadmap should be: + +`102 static and mock validation roadmap` + +That roadmap must consume the final materialization manifest, not scattered follow-up directories. diff --git a/docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-design.md b/docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-design.md new file mode 100644 index 0000000..f3e968f --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-child-plan-sequence-design.md @@ -0,0 +1,221 @@ +# Scene Skill 102 Full Coverage Child Plan Sequence Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework Design: `docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-framework-design.md` +> Parent Framework Plan: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` + +## Intent + +Turn the parent `102` full-coverage framework into a fixed downstream child-plan sequence. + +This design does not implement any bucket directly. It defines: + +1. the ordered bounded plans that must be created and executed next +2. which bucket each bounded plan owns +3. which layer and route each bounded plan belongs to +4. which plans are implementation plans and which plans are policy/reconciliation plans +5. what later plans are not allowed to skip + +The main purpose is to stop later work from drifting into ad hoc micro-plans. + +## Current Parent Baseline + +The parent framework freezes the current integrated state as: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 47 | +| `adjudicated-valid-host-bridge` | 4 | +| raw `source-unreadable` | 3 | +| Total | 102 | + +The timeout hygiene layer additionally shows: + +| Hygiene interpretation | Count | +| --- | ---: | +| `timeout-as-pass-candidate` | 2 | +| `timeout-as-fail-closed-candidate` | 1 | +| `timeout-still-unreadable` | 0 | + +## Child Sequence Principles + +Every child plan in this sequence must follow the parent framework requirements: + +1. one child plan belongs to exactly one route +2. one child plan belongs to exactly one layer +3. one child plan owns one fixed input bucket +4. one child implementation slice should target one repeated recoverable pattern +5. child plans must not silently absorb neighboring buckets +6. a child plan must stop after its declared delta is measured + +## Ordered Child Routes + +The child plan sequence begins at `Route 2`, because `Route 1` has already been completed at the parent-framework level. + +### Route 2 + +`Layer C + Layer D` + +Target: + +`G3 / paginated_enrichment structured fail-closed bucket` + +Current bucket size: + +`34` + +Fixed bounded child-plan order inside Route 2: + +1. `G3 enrichment-request closure` +2. `G3 export-plan closure` +3. `G3 residual contract closure` + +The residual plan only begins after the first two plans have either: + +1. produced measurable delta, or +2. been explicitly closed as deferred + +### Route 3 + +`Layer C + Layer D` + +Target: + +`G2 / multi_mode_request structured fail-closed bucket` + +Current bucket size: + +`4` + +Fixed bounded child-plan order inside Route 3: + +1. `G2 remaining fail-closed closure` + +No Route 3 follow-up plan may begin until Route 2 has been completed or explicitly deferred. + +### Route 4 + +`Layer C + Layer D` + +Target: + +`G1-E / single_request_enrichment structured fail-closed bucket` + +Current bucket size: + +`2` + +Fixed bounded child-plan order inside Route 4: + +1. `G1-E remaining fail-closed closure` + +No Route 4 follow-up plan may begin until Route 3 has been completed or explicitly deferred. + +### Route 5 + +`Layer C + Layer D` + +Target: + +Boundary-family fail-closed buckets: + +1. `local_doc_pipeline = 5` +2. `host_bridge_workflow = 1` +3. `page_state_eval/bootstrap_target = 1` + +Fixed bounded child-plan order inside Route 5: + +1. `boundary fail-closed decision` + +This route is decision-first by design. It must not start implementation correction before mainline routes have been reduced or deferred. + +### Route 6 + +`Layer E` + +Target: + +Promotion thresholds and board reconciliation policy. + +Fixed bounded child-plan order inside Route 6: + +1. `promotion and board reconciliation policy` + +This route must start only after Routes 2 through 5 have stable post-delta reporting. + +## Required Child Plan Fields + +Every bounded child plan in this sequence must declare: + +1. parent framework reference +2. parent route name +3. parent layer name +4. fixed input bucket +5. allowed file set +6. forbidden file set +7. expected coverage delta +8. stop statement + +If one of these is missing, the plan is not valid under this sequence. + +## Implementation vs Policy Split + +The child sequence intentionally separates implementation plans from policy plans. + +Implementation-oriented plans: + +1. `G3 enrichment-request closure` +2. `G3 export-plan closure` +3. `G3 residual contract closure` +4. `G2 remaining fail-closed closure` +5. `G1-E remaining fail-closed closure` + +Decision or policy-oriented plans: + +1. `boundary fail-closed decision` +2. `promotion and board reconciliation policy` + +## Expected Coverage Movement + +This sequence does not promise `auto-pass` growth on every child plan. + +Expected valid deltas include: + +1. `fail-closed-known` reduction +2. stronger structured fail-closed naming +3. bucket shrinkage within one archetype +4. policy-recognized status strengthening + +Invalid deltas include: + +1. scene-name hardcoding +2. silent gate relaxation +3. route changes that are not measured against current canonical and real-sample anchors + +## Stop Rules + +This child-plan sequence forbids: + +1. opening a child implementation plan outside Routes 2 through 6 +2. creating route-local semantics micro-plans that do not reduce a measured bucket +3. mixing timeout hygiene with contract recovery in the same bounded implementation plan +4. updating `scene_execution_board_2026-04-18.json` inside any Route 2 through Route 5 implementation plan +5. starting Route 6 before post-Route-5 status is stable enough for policy design + +## Completion Condition + +This child-plan sequence remains active until all of these are true: + +1. the Route 2 child plans are completed or deferred +2. the Route 3 child plan is completed or deferred +3. the Route 4 child plan is completed or deferred +4. the Route 5 decision plan is completed +5. the Route 6 policy plan is completed + +At that point, the parent framework may either: + +1. remain active with no open child routes, or +2. be revised into a new parent framework revision + diff --git a/docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-framework-design.md b/docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-framework-design.md new file mode 100644 index 0000000..ae90709 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-scene-skill-102-full-coverage-framework-design.md @@ -0,0 +1,391 @@ +# Scene Skill 102 Full Coverage Framework Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Roadmap: `docs/superpowers/plans/2026-04-17-scene-skill-60-to-90-roadmap-plan.md` +> Upstream Reconciliation: `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` +> Upstream Follow-up: `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` +> Upstream Timeout Hygiene: `tests/fixtures/generated_scene/timeout_rerun_hygiene_integration_2026-04-19.json` + +## Intent + +Provide the single post-roadmap framework design for driving the current sgClaw scene-to-skill pipeline from partial `102` scene coverage to full bounded `102` scene coverage. + +This design is intentionally broader than the bounded micro-plans used so far. It defines: + +1. the current actual state of the `102` scene set +2. what is still missing before `100%` coverage can be claimed +3. the layered framework that all future changes must fit into +4. the fixed route order for future implementation work +5. the stop rules that prevent the project from drifting into unbounded plan recursion + +This design is meant to become the single parent framework for later bounded plans. + +## Current State + +### Raw Current State + +From the latest integrated assets: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 47 | +| `adjudicated-valid-host-bridge` | 4 | +| raw `source-unreadable` | 3 | +| Total | 102 | + +### Timeout Hygiene Overlay + +The timeout hygiene layer shows that the raw `3` timeout records are not all hard unreadable records: + +| Hygiene-aware timeout interpretation | Count | +| --- | ---: | +| `timeout-as-pass-candidate` | 2 | +| `timeout-as-fail-closed-candidate` | 1 | +| `timeout-still-unreadable` | 0 | +| `timeout-rerun-error` | 0 | + +### Interpretation + +This means the framework has already reached these milestones: + +1. there are no `unsupported-family` scenes in the current `102` sweep +2. there are no unresolved route conflicts left in the current `102` sweep +3. the remaining gap is no longer “framework cannot classify this scene” +4. the remaining gap is “contract does not close” or “timeout budget/hygiene distorts the raw reading” + +## What Is Still Missing Before 100% Coverage + +`100%` coverage does not mean all `102` scenes must become direct `auto-pass`. + +For this framework, `100% bounded coverage` means: + +1. every scene is classified into a supported framework path +2. every non-pass result is either: + - structured fail-closed with named blocker + - valid host-bridge workflow adjudication + - hygiene-aware timeout interpretation +3. there are no unresolved buckets like: + - unsupported family + - unresolved route conflict + - opaque no-report failure + - unexplained timeout + +Under that definition, the missing gap is: + +### Missing Gap A: Structured Contract Closure + +There are still `47` structured fail-closed records. + +Current distribution: + +| Archetype | Count | +| --- | ---: | +| `paginated_enrichment` | 34 | +| `local_doc_pipeline` | 5 | +| `multi_mode_request` | 4 | +| `single_request_enrichment` | 2 | +| `host_bridge_workflow` | 1 | +| `page_state_eval` | 1 | + +This is the largest remaining implementation gap. + +### Missing Gap B: Timeout Hygiene Integration into Main Reporting + +The timeout hygiene layer now exists, but it is still a reporting-side overlay. It has not yet been folded into the primary current-state narrative used by later roadmap decisions. + +### Missing Gap C: Current-State Overlay vs Execution Board + +The project intentionally did not update `scene_execution_board_2026-04-18.json` during these bounded plans. That is correct, but it means the official board is still behind the latest integrated view. + +### Missing Gap D: Promotion Policy + +The project still lacks a single parent rule that says when a structured fail-closed scene may be promoted from: + +1. fail-closed +2. fail-closed with stronger evidence +3. bounded rerun pass candidate + +into a stronger scene-level coverage status. + +## Framework Layers + +All future work must land in exactly one of these layers. + +### Layer A: Source Scan and Budget Layer + +Purpose: + +1. source directory size handling +2. file filtering +3. timeout budget policy +4. rerun hygiene + +Owned concerns: + +1. source scan volume +2. timeout policy +3. rerun interpretation + +Must not own: + +1. archetype routing +2. contract closure logic +3. scene promotion + +Primary code area: + +1. `src/generated_scene/analyzer.rs` +2. reporting JSON and sweep scripts + +### Layer B: Archetype Routing Layer + +Purpose: + +1. decide the correct framework path: + - `single_request_table` + - `single_request_enrichment` + - `multi_mode_request` + - `paginated_enrichment` + - `host_bridge_workflow` + - `multi_endpoint_inventory` + - `local_doc_pipeline` + +Owned concerns: + +1. route precedence +2. mixed-evidence routing boundaries +3. route adjudication support + +Must not own: + +1. timeout policy +2. contract synthesis beyond routing evidence +3. board reconciliation + +Primary code area: + +1. `src/generated_scene/analyzer.rs` + +### Layer C: Contract Recovery Layer + +Purpose: + +Recover the minimum business contract fields needed by each supported archetype. + +Owned concerns: + +1. request contract recovery +2. response contract recovery +3. pagination plan recovery +4. enrichment request recovery +5. join key recovery +6. export plan recovery +7. mode matrix recovery + +Must not own: + +1. timeout policy +2. execution board updates +3. status promotion + +Primary code area: + +1. `src/generated_scene/generator.rs` +2. `src/generated_scene/ir.rs` + +### Layer D: Structured Fail-Closed and Reporting Layer + +Purpose: + +Make every incomplete scene fail in an explainable and structured way. + +Owned concerns: + +1. readiness-before-report classification +2. blocker naming +3. `contractSnapshot` +4. generation-report completeness + +Must not own: + +1. route preference +2. source scan budget +3. promotion policy + +Primary code area: + +1. `src/generated_scene/generator.rs` +2. reporting assets under `tests/fixtures/generated_scene/` + +### Layer E: Sweep, Reconciliation, and Coverage Layer + +Purpose: + +Measure the whole `102` scene set, reconcile multiple interpretation layers, and report trustworthy coverage. + +Owned concerns: + +1. full sweep outputs +2. route adjudication overlay +3. timeout hygiene overlay +4. integrated coverage reporting +5. board reconciliation planning + +Must not own: + +1. analyzer implementation changes +2. generator implementation changes + +Primary assets: + +1. `tests/fixtures/generated_scene/*full_sweep*` +2. `tests/fixtures/generated_scene/*reconciliation*` +3. `tests/fixtures/generated_scene/*timeout*hygiene*` +4. `docs/superpowers/reports/*coverage*` + +## Coverage Definitions + +This framework uses four explicit coverage concepts. + +### Coverage 1: Direct Pass Coverage + +Scenes with direct `auto-pass`. + +Current count: + +`48 / 102` + +### Coverage 2: Framework-Resolved Coverage + +Scenes in one of: + +1. `auto-pass` +2. `adjudicated-valid-host-bridge` +3. structured `fail-closed-known` +4. hygiene-aware timeout interpretation + +This is the best measure of whether the framework has “caught” the scene set. + +### Coverage 3: Promotion Coverage + +Scenes already represented as promoted or boundary family assets in current project assets. + +This is lower than framework-resolved coverage because promotion is intentionally conservative. + +### Coverage 4: Real-Sample Execution Coverage + +Scenes that have actual selected and executed real-sample validation records. + +This is the strictest coverage metric. + +## Fixed Route Order for Future Work + +Future work must follow this order. + +### Route 1: Finish Layer E Hygiene Integration + +Goal: + +Make sweep and reconciliation reporting hygiene-aware by default. + +This route is nearly finished and should be closed first. + +### Route 2: `G3 / paginated_enrichment` Contract Closure + +Goal: + +Work down the largest remaining structured fail-closed bucket. + +Why first: + +1. largest bucket by count +2. most important for closing the remaining `102` gap +3. already split into repeated missing-contract patterns + +Expected sub-order: + +1. `enrichment_request_missing` +2. `export_plan_missing` +3. then any remaining `join_key` or runtime-scope style gaps + +### Route 3: `G2 / multi_mode_request` Small-Bucket Closure + +Goal: + +Close the remaining `4` multi-mode structured fail-closed records. + +Why third: + +1. clear archetype +2. relatively small bucket +3. mainline family already has real-sample pass anchor + +### Route 4: `G1-E / single_request_enrichment` Small-Bucket Closure + +Goal: + +Close the remaining `2` G1-E structured fail-closed records. + +Why fourth: + +1. smallest mainline bucket +2. framework anchor already exists +3. lower leverage than G3 and G2 + +### Route 5: Decide on `local_doc_pipeline` and `host_bridge_workflow` + +Goal: + +Handle the remaining boundary-family fail-closed records only after the mainline buckets are reduced. + +This route must not start before Routes 2–4 have completed or been explicitly deferred. + +### Route 6: Reconciliation and Board Promotion Policy + +Goal: + +Define how stronger framework-resolved statuses can update the execution board without over-promoting scenes. + +This must be done only after contract-closure routes have produced stable deltas. + +## What Future Plans Must Contain + +Every later bounded implementation plan must explicitly declare: + +1. which framework layer it belongs to +2. which route from this design it belongs to +3. which code modules it is allowed to touch +4. which code modules it must not touch +5. how it protects current real-sample and canonical passes +6. what exact delta it expects to produce in the `102` scene state + +If a future plan cannot answer those six items, it is out of framework and should not start. + +## Stop Rules + +The framework forbids: + +1. starting a new micro-plan that only renames a narrower semantics problem without moving toward a route completion +2. treating timeout rerun success as promotion +3. updating execution board state inside a diagnostic plan +4. opening `G4/G5` before the current structured fail-closed mainline is reduced +5. using prompt-only tuning as a substitute for contract recovery + +## What 100% Looks Like + +This framework considers `100% bounded coverage` achieved when: + +1. `unsupported-family = 0` +2. `missing-source = 0` +3. `misclassified-unresolved = 0` +4. `timeout-still-unreadable = 0` +5. every remaining non-pass scene is structured and attributable to a supported framework path +6. execution board and reconciliation reporting can express the current scene state without ambiguity + +This is different from `100% auto-pass`. + +`100% auto-pass` is not the immediate target. + +`100% bounded framework coverage` is the immediate target. diff --git a/docs/superpowers/specs/2026-04-19-structured-fail-closed-improvement-roadmap-design.md b/docs/superpowers/specs/2026-04-19-structured-fail-closed-improvement-roadmap-design.md new file mode 100644 index 0000000..daccf36 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-structured-fail-closed-improvement-roadmap-design.md @@ -0,0 +1,130 @@ +# Structured Fail-Closed Improvement Roadmap Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Reconciliation: `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` + +## Intent + +Turn the `48` structured fail-closed records from the reconciled `102` sweep into a governed improvement roadmap. + +The objective is not to weaken gates or inflate `auto-pass`. The objective is to classify contract gaps, identify the highest-value bounded correction slices, and then improve generic scene-to-skill conversion where evidence can be recovered safely. + +## Current Reconciled Baseline + +After status reconciliation, the `102` scene set is: + +| Reconciled status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 48 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 2 | +| `missing-source` | 0 | +| `unsupported-family` | 0 | + +The `4` raw route conflicts are no longer unresolved route bugs. They are valid host-bridge workflows. + +This roadmap therefore focuses on `fail-closed-known = 48`. + +## Fail-Closed Buckets + +| Inferred archetype | Reason | Count | +| --- | --- | ---: | +| `paginated_enrichment` | `workflow evidence is incomplete before package generation` | 35 | +| `local_doc_pipeline` | `workflow evidence is incomplete before package generation` | 5 | +| `multi_mode_request` | `workflow evidence is incomplete before package generation` | 4 | +| `single_request_enrichment` | `workflow evidence is incomplete before package generation` | 2 | +| `host_bridge_workflow` | `workflow evidence is incomplete before package generation` | 1 | +| `page_state_eval` | `bootstrap_target` | 1 | + +The first priority is the `35` `paginated_enrichment` records because they are the largest bucket and map to the most important currently generic workflow family. + +## Scope Guardrails + +In scope: + +1. classify the `48` structured fail-closed records by missing contract piece +2. prioritize bounded correction slices +3. implement bounded evidence recovery only after classification shows repeated recoverable patterns +4. keep all fail-closed semantics intact +5. rerun a bounded follow-up sweep after corrections + +Out of scope: + +1. adding new scene families +2. starting `G4/G5` +3. login recovery +4. full browser host runtime transport +5. local document attachment runtime +6. auto-promoting scenes into the execution board +7. weakening readiness gates to increase pass counts +8. reopening the already adjudicated `4` valid-host-bridge workflows +9. handling the `2` remaining timeout records in this roadmap + +## Workstreams + +1. `WS1` Fail-Closed Inventory and Gap Taxonomy +2. `WS2` G3 Paginated Enrichment Contract Recovery +3. `WS3` Small-Bucket Contract Recovery +4. `WS4` Bootstrap Target Isolation +5. `WS5` Follow-Up Sweep and Coverage Delta + +## Gap Taxonomy + +Every structured fail-closed record must receive one primary missing-contract label: + +1. `main_request_missing` +2. `pagination_plan_missing` +3. `enrichment_request_missing` +4. `join_key_missing` +5. `export_plan_missing` +6. `mode_matrix_missing` +7. `mode_request_contract_missing` +8. `single_request_enrichment_contract_missing` +9. `host_bridge_contract_missing` +10. `local_doc_contract_missing` +11. `bootstrap_target_unresolved` +12. `mixed_or_ambiguous_contract_gap` + +Secondary labels may be added, but every record must have exactly one primary label. + +## Correction Strategy + +Corrections must be pattern-based, not scene-by-scene. + +Allowed correction types: + +1. bounded evidence extraction for repeated field names or workflow structures +2. bounded IR fallback only when evidence is explicit and traceable +3. more specific fail-closed reason reporting +4. regression tests for each recovered pattern + +Forbidden correction types: + +1. hard-coding a scene name to pass +2. converting fail-closed records to pass without closing the contract +3. broad route-precedence rewrites +4. disabling or relaxing gates + +## Expected Outputs + +1. `tests/fixtures/generated_scene/structured_fail_closed_inventory_2026-04-19.json` +2. `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` +3. `docs/superpowers/reports/2026-04-19-structured-fail-closed-inventory-report.md` +4. `docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-coverage-delta-report.md` +5. `docs/superpowers/reports/2026-04-19-structured-fail-closed-improvement-roadmap-closure-report.md` + +## Acceptance Criteria + +1. all `48` fail-closed records are inventoried +2. all `48` records have exactly one primary missing-contract label +3. the `35` `paginated_enrichment` records are split into actionable G3 gap groups +4. implementation, if performed, is limited to repeated recoverable patterns +5. no adjudicated host-bridge record is reopened +6. follow-up results are measured against the reconciled baseline +7. execution board status remains unchanged + +## Completion Signal + +The roadmap is complete when the `48` structured fail-closed records are no longer a single broad bucket and the follow-up sweep quantifies whether bounded evidence recovery improved safe conversion coverage. diff --git a/docs/superpowers/specs/2026-04-19-structured-fail-closed-residual-13-closure-design.md b/docs/superpowers/specs/2026-04-19-structured-fail-closed-residual-13-closure-design.md new file mode 100644 index 0000000..77d83a8 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-structured-fail-closed-residual-13-closure-design.md @@ -0,0 +1,86 @@ +# Structured Fail-Closed Residual 13 Closure Design + +> Date: 2026-04-19 +> Status: Draft +> Parent Framework: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Parent Layer: `Layer C` and `Layer D` +> Upstream Asset: `tests/fixtures/generated_scene/full_coverage_reconciliation_candidates_2026-04-19.json` + +## Intent + +Close or explicitly defer the remaining `13` `framework-structured-fail-closed` scenes after the 102 full-coverage follow-up sweep. + +This design does not update the official execution board. It defines how the remaining residual bucket must be split before any implementation work starts. + +## Current Residual Bucket + +The current residual bucket contains `13` scenes: + +| Archetype | Count | Direction | +| --- | ---: | --- | +| `paginated_enrichment` | 4 | mainline residual closure | +| `multi_mode_request` | 2 | mainline residual closure | +| `local_doc_pipeline` | 5 | boundary hold or future G8 runtime roadmap | +| `host_bridge_workflow` | 1 | boundary hold or future G6 runtime roadmap | +| `page_state_eval` | 1 | bootstrap target isolation | + +## Fixed Residual Scenes + +| Scene ID | Scene | Archetype | Reason | +| --- | --- | --- | --- | +| `sweep-007-scene` | `95598供电服务月报` | `paginated_enrichment` | workflow evidence incomplete | +| `sweep-018-scene` | `白银线损周报` | `multi_mode_request` | readiness not A/B | +| `sweep-033-scene` | `供电可靠率指标统计表` | `local_doc_pipeline` | workflow evidence incomplete | +| `sweep-034-scene` | `供电可靠性数据质量自查报告月报` | `local_doc_pipeline` | workflow evidence incomplete | +| `sweep-039-scene` | `故障报修工单信息统计表` | `paginated_enrichment` | workflow evidence incomplete | +| `sweep-042-scene` | `国网金昌供电公司营商环境周例会报告` | `local_doc_pipeline` | workflow evidence incomplete | +| `sweep-051-scene` | `嘉峪关可靠性分析报告` | `local_doc_pipeline` | workflow evidence incomplete | +| `sweep-068-scene` | `输变电设备运行分析报告` | `paginated_enrichment` | workflow evidence incomplete | +| `sweep-071-scene` | `台区线损大数据-月_周累计线损率统计分析` | `multi_mode_request` | readiness not A/B | +| `sweep-074-scene` | `同兴智能安全督查日报` | `local_doc_pipeline` | workflow evidence incomplete | +| `sweep-084-scene` | `巡视计划完成情况自动检索` | `paginated_enrichment` | workflow evidence incomplete | +| `sweep-085-scene` | `业扩报装管理制度` | `host_bridge_workflow` | workflow evidence incomplete | +| `sweep-091-scene` | `用户停电频次分析监测` | `page_state_eval` | readiness not A/B | + +## Route Mapping + +This residual bucket must be split into bounded child work: + +1. `Residual Route A`: `G3 / paginated_enrichment` residual closure for 4 scenes. +2. `Residual Route B`: `G2 / multi_mode_request` residual closure for 2 scenes. +3. `Residual Route C`: boundary hold decision for local-doc and host-bridge residuals. +4. `Residual Route D`: bootstrap target isolation for the page-state residual. +5. `Residual Route E`: follow-up mini sweep and reconciliation candidate refresh. + +## Guardrails + +1. Do not update `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json`. +2. Do not add new families. +3. Do not start `G4/G5`. +4. Do not relax readiness gates. +5. Do not merge boundary runtime work with mainline contract closure. +6. Do not classify local-doc residuals as mainline pass without a separate G8 runtime roadmap. +7. Do not classify host-bridge residuals as mainline pass without a separate G6 runtime roadmap. + +## Implementation Boundaries + +Allowed implementation routes: + +1. `G3` residual workflow evidence recovery. +2. `G2` residual request/response/mode readiness correction. + +Decision-only routes: + +1. `local_doc_pipeline` residuals. +2. `host_bridge_workflow` residual. +3. `page_state_eval/bootstrap_target` residual. + +## Completion Criteria + +This residual design is complete when: + +1. all 13 scenes are assigned to one residual route; +2. the mainline residual routes have bounded implementation plans; +3. boundary and bootstrap residuals have explicit hold/isolate decisions; +4. a follow-up mini sweep plan exists to measure the residual closure delta. + diff --git a/docs/superpowers/specs/2026-04-19-timeout-budget-rerun-hygiene-design.md b/docs/superpowers/specs/2026-04-19-timeout-budget-rerun-hygiene-design.md new file mode 100644 index 0000000..d810f05 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-timeout-budget-rerun-hygiene-design.md @@ -0,0 +1,99 @@ +# Timeout Budget and Rerun Hygiene Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Diagnostic: `docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md` + +## Intent + +Prevent budget-sensitive scenes from being miscounted as `source-unreadable` when they can resolve into: + +1. `executed-pass` +2. structured `fail-closed` + +under a bounded rerun budget. + +This design does not attempt to improve scene understanding. It only changes timeout handling and rerun classification hygiene. + +## Problem Statement + +The timeout regression diagnostic produced: + +| Scene id | Diagnostic label | Actual behavior under `90s` | +| --- | --- | --- | +| `sweep-015-scene` | `timeout-rerun-pass` | completed successfully | +| `sweep-025-scene` | `timeout-rerun-pass` | completed successfully | +| `sweep-040-scene` | `timeout-rerun-fail-closed` | resolved into structured fail-closed | + +This means the current fixed `45s` budget is too coarse for a subset of scenes. It collapses: + +1. budget-sensitive success +2. budget-sensitive fail-closed +3. true unreadable or hanging cases + +into the same `source-unreadable` bucket. + +## Scope + +In scope: + +1. define a bounded timeout-budget policy +2. define when a diagnostic rerun is allowed +3. define how rerun results should be classified +4. define output JSON and report for timeout hygiene verification + +Out of scope: + +1. analyzer logic changes +2. generator contract recovery changes +3. scene promotion +4. execution board updates +5. full `102` sweep improvement work +6. timeout implementation unrelated to rerun hygiene + +## Policy + +### Primary Sweep Budget + +The initial sweep still runs with the fixed primary budget. + +### Secondary Diagnostic Budget + +When a scene ends with: + +1. `source-unreadable` +2. reason `generator timeout after 45s` + +it becomes eligible for one bounded rerun under a secondary timeout budget. + +### Rerun Result Mapping + +A bounded rerun may only map to: + +1. `timeout-rerun-pass` +2. `timeout-rerun-fail-closed` +3. `timeout-rerun-timeout` +4. `timeout-rerun-error` + +These are hygiene classifications, not promoted scene statuses. + +### Board and Promotion Boundary + +Even when rerun succeeds: + +1. do not update `scene_execution_board_2026-04-18.json` +2. do not convert the scene to promoted status +3. do not silently merge the rerun result into canonical scene status + +## Output + +1. `tests/fixtures/generated_scene/timeout_budget_rerun_hygiene_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-timeout-budget-rerun-hygiene-report.md` + +## Success Criteria + +1. timeout scenes are no longer treated as a single unreadable bucket in the hygiene layer +2. rerun-pass and rerun-fail-closed are distinguishable +3. true timeout cases remain distinguishable +4. no analyzer or generator implementation changes are made +5. no execution board updates are made diff --git a/docs/superpowers/specs/2026-04-19-timeout-regression-diagnostic-design.md b/docs/superpowers/specs/2026-04-19-timeout-regression-diagnostic-design.md new file mode 100644 index 0000000..c9e8652 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-timeout-regression-diagnostic-design.md @@ -0,0 +1,114 @@ +# Timeout Regression Diagnostic Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Plan: `docs/superpowers/plans/2026-04-19-structured-fail-closed-improvement-roadmap-plan.md` +> Upstream Follow-up: `tests/fixtures/generated_scene/structured_fail_closed_improvement_followup_2026-04-19.json` + +## Intent + +Diagnose the three timeout records visible after the structured fail-closed improvement follow-up sweep. + +This design is diagnostic-only. It does not change analyzer or generator logic, promote scenes, update the execution board, or treat a longer rerun success as a validated pass. + +## Problem Statement + +The structured fail-closed improvement follow-up sweep produced: + +| Status | Count | +| --- | ---: | +| `auto-pass` | 48 | +| `fail-closed-known` | 47 | +| `adjudicated-valid-host-bridge` | 4 | +| `source-unreadable` | 3 | + +The three `source-unreadable` records are timeout records: + +| Scene id | Scene | Timeout type | +| --- | --- | --- | +| `sweep-015-scene` | `任务报表` | persistent timeout | +| `sweep-025-scene` | `力禾动环系统巡视记录` | persistent timeout | +| `sweep-040-scene` | `嘉峪关日报` | new regression timeout | + +`sweep-040-scene` is the most important record because it regressed from `fail-closed-known` in the reconciled baseline to `source-unreadable` in the follow-up sweep. + +## Scope + +In scope: + +1. identify the three timeout records from the follow-up sweep +2. collect source directory diagnostics +3. run bounded diagnostic reruns with longer timeout budgets +4. classify each timeout into a secondary timeout reason +5. publish diagnostic JSON and report + +Out of scope: + +1. analyzer or generator implementation changes +2. readiness gate changes +3. execution board updates +4. scene promotion +5. family baseline changes +6. handling the remaining `47` structured fail-closed records +7. handling the `4` adjudicated host-bridge records + +## Diagnostic Labels + +Each timeout must receive exactly one final diagnostic label: + +1. `timeout-rerun-pass` +2. `timeout-rerun-fail-closed` +3. `timeout-large-source` +4. `timeout-command-hang` +5. `timeout-nondeterministic` +6. `timeout-source-scan-heavy` +7. `timeout-unknown` + +Secondary labels may be attached for: + +1. large file count +2. large total size +3. many HTML or JS files +4. generated report present after rerun +5. stderr decode noise +6. elapsed time near budget + +## Required Evidence + +For each timeout record, collect: + +1. scene id +2. scene name +3. source directory +4. previous reconciled status +5. follow-up status +6. file count +7. total source bytes +8. HTML file count +9. JavaScript file count +10. largest files +11. diagnostic rerun exit code +12. diagnostic rerun elapsed seconds +13. diagnostic rerun timed out flag +14. generation report path if produced +15. generation status if produced +16. final diagnostic label + +## Output + +Diagnostic output: + +`tests/fixtures/generated_scene/timeout_regression_diagnostic_2026-04-19.json` + +Report output: + +`docs/superpowers/reports/2026-04-19-timeout-regression-diagnostic-report.md` + +## Success Criteria + +1. exactly three timeout records are diagnosed +2. `sweep-040-scene` is explicitly marked as the regression timeout +3. the two persistent timeout records remain distinguishable from the regression timeout +4. each record has one final diagnostic label +5. no implementation changes are made +6. no execution board state is updated diff --git a/docs/superpowers/specs/2026-04-19-timeout-rerun-hygiene-integration-design.md b/docs/superpowers/specs/2026-04-19-timeout-rerun-hygiene-integration-design.md new file mode 100644 index 0000000..6e5b131 --- /dev/null +++ b/docs/superpowers/specs/2026-04-19-timeout-rerun-hygiene-integration-design.md @@ -0,0 +1,99 @@ +# Timeout Rerun Hygiene Integration Design + +> Date: 2026-04-19 +> Status: Draft +> Upstream Hygiene: `tests/fixtures/generated_scene/timeout_budget_rerun_hygiene_2026-04-19.json` +> Upstream Reconciliation: `tests/fixtures/generated_scene/full_sweep_status_reconciliation_2026-04-19.json` + +## Intent + +Integrate timeout rerun hygiene into future sweep interpretation and reconciliation reporting so budget-sensitive timeout scenes are not miscounted as a single `source-unreadable` bucket. + +This is a reporting and classification integration only. It does not change analyzer or generator behavior. + +## Problem Statement + +The timeout hygiene layer now distinguishes: + +1. `rerun-resolved-pass` +2. `rerun-resolved-fail-closed` +3. `rerun-still-timeout` +4. `rerun-error` + +Without integration, a future sweep or reconciliation still risks reporting: + +`source-unreadable = 3` + +when the actual hygiene-aware interpretation is: + +1. `2` budget-sensitive pass candidates +2. `1` budget-sensitive fail-closed candidate +3. `0` persistent timeout after rerun + +## Scope + +In scope: + +1. define a hygiene-aware reporting schema +2. define how raw timeout status and rerun hygiene status coexist +3. define reconciliation-layer summary fields +4. define output JSON and report for the integrated view + +Out of scope: + +1. analyzer changes +2. generator changes +3. execution board updates +4. scene promotion +5. full `102` sweep rerun +6. timeout implementation fixes + +## Integration Rules + +### Raw Status Preservation + +The raw sweep status is preserved. + +Example: + +`source-unreadable` + +remains stored as the raw sweep result. + +### Hygiene Overlay + +When a timeout record has a hygiene record, the integrated layer adds: + +1. `hygieneStatus` +2. `hygieneInterpretation` + +### Hygiene Interpretation + +Allowed integrated timeout interpretations: + +1. `timeout-as-pass-candidate` +2. `timeout-as-fail-closed-candidate` +3. `timeout-still-unreadable` +4. `timeout-rerun-error` + +### Summary Output + +The integrated summary must report both: + +1. raw status counts +2. hygiene-aware timeout interpretation counts + +This prevents lossy reporting. + +## Output + +1. `tests/fixtures/generated_scene/timeout_rerun_hygiene_integration_2026-04-19.json` +2. `docs/superpowers/reports/2026-04-19-timeout-rerun-hygiene-integration-report.md` + +## Success Criteria + +1. raw timeout counts remain visible +2. hygiene-aware timeout interpretation counts are added +3. the three timeout records become distinguishable in reconciliation reporting +4. no analyzer or generator code is changed +5. no execution board state is updated diff --git a/docs/superpowers/specs/2026-04-20-deterministic-keyword-scoring-refinement-design.md b/docs/superpowers/specs/2026-04-20-deterministic-keyword-scoring-refinement-design.md new file mode 100644 index 0000000..0bee1b2 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-deterministic-keyword-scoring-refinement-design.md @@ -0,0 +1,56 @@ +# Deterministic Keyword Scoring Refinement Design + +> Date: 2026-04-20 +> Parent Plan: `2026-04-20-scene-skill-102-deterministic-invocation-readiness-plan.md` + +## Intent + +Resolve the 9 deterministic dispatch ambiguity gaps found after normalizing the final materialized scene skills for `。。。` invocation. + +The previous readiness pass proved that 92 complete packages can be selected by full-scene-name samples, but 9 scenes collide because generated include keywords are too broad for current score rules. + +## Fixed Gap Set + +1. `sweep-026-scene / 县区公司故障明细` +2. `sweep-034-scene / 售电收入日统计排程预测` +3. `sweep-037-scene / 嘉峪关可靠性分析报告` +4. `sweep-038-scene / 嘉峪关周报` +5. `sweep-039-scene / 嘉峪关故障明细` +6. `sweep-040-scene / 嘉峪关日报` +7. `sweep-041-scene / 嘉峪关月报` +8. `sweep-044-scene / 国网金昌供电公司指挥中心生产例会报告` +9. `sweep-045-scene / 国网金昌供电公司营商环境周例会报告` + +## Scope + +Allowed: + +1. Refine deterministic include/exclude keywords for the fixed 9 scenes and direct collision partners when needed. +2. Run dispatch dry-run checks without browser execution. +3. Publish refinement decisions and readiness delta. + +Forbidden: + +1. Do not execute browser scripts. +2. Do not repair `sweep-012-scene`. +3. Do not change generated scripts. +4. Do not update official execution board. +5. Do not modify runtime dispatch code unless this design is superseded by a separate runtime-scoring implementation plan. + +## Strategy + +Prefer manifest-level disambiguation first: + +1. keep full scene names as primary keywords; +2. remove overly broad standalone tokens from colliding scenes where they create ties; +3. add distinctive phrase keywords only when present in the scene name; +4. use `exclude_keywords` only for direct mutually exclusive cases. + +Runtime scoring changes are out of scope for this plan unless manifest refinement cannot make all 9 gaps uniquely selectable. + +## Completion Criteria + +1. All 9 fixed gaps have final decisions. +2. Full-scene-name dispatch dry-run uniquely selects the expected scene for each fixed gap. +3. No new ambiguity is introduced for the complete 101-package set. +4. `sweep-012-scene` remains excluded. diff --git a/docs/superpowers/specs/2026-04-20-final-skill-human-readable-index-design.md b/docs/superpowers/specs/2026-04-20-final-skill-human-readable-index-design.md new file mode 100644 index 0000000..1950eed --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-final-skill-human-readable-index-design.md @@ -0,0 +1,68 @@ +# Final Skill Human-Readable Index Design + +> Date: 2026-04-20 +> Parent Plan: `2026-04-19-scene-skill-102-final-materialization-plan.md` + +## Intent + +Make the final materialized skill set usable by humans without changing canonical scene ids, generation logic, or generated scripts. + +The final skill directories intentionally use stable ids such as `sweep-001-scene`, but this is not enough for review, validation, or handoff. This design adds a human-readable index and normalizes metadata for already materialized skill packages. + +## Scope + +This design only addresses readability and metadata. + +Allowed: + +1. Create `SCENE_INDEX.md` under the final materialization root. +2. Create `scene_skill_102_index.json` under the final materialization root. +3. Update existing materialized `SKILL.toml` files with `display_name`, `scene_id`, and `scene_name`. +4. Update existing materialized `SKILL.md` files with readable scene metadata. +5. Publish a superpowers report. + +Forbidden: + +1. Do not rerun `sg_scene_generate`. +2. Do not rename `sweep-xxx-scene` directories. +3. Do not modify generated scripts. +4. Do not modify `src/generated_scene/analyzer.rs`, `src/generated_scene/generator.rs`, or `src/generated_scene/ir.rs`. +5. Do not repair `sweep-012-scene` package generation failure. +6. Do not update the official execution board. + +## Data Sources + +Authoritative scene names come from: + +`tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` + +Materialization state comes from: + +`tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` + +Failure state comes from: + +`tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` + +## Output Shape + +`SCENE_INDEX.md` must include: + +1. scene id; +2. scene name; +3. archetype; +4. readiness; +5. materialization status; +6. skill directory; +7. failure reason when applicable. + +`scene_skill_102_index.json` must contain the same mapping in machine-readable form. + +Each complete skill package should expose the readable name in `SKILL.toml` and `SKILL.md` while preserving the stable `name = "sweep-xxx-scene"` identifier. + +## Completion Criteria + +1. The index contains exactly 102 rows. +2. The failed `sweep-012-scene` is present and explicitly marked as failed. +3. Complete packages have readable metadata. +4. No generation or recovery work is performed. diff --git a/docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-design.md b/docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-design.md new file mode 100644 index 0000000..deedcf9 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-design.md @@ -0,0 +1,180 @@ +# Generated Scene Rule Hardening Route Design + +> Date: 2026-04-20 +> Status: Draft +> Parent roadmap: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` +> Upstream ledger: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-plan.md` + +## Intent + +Define the bounded route-design stage that converts the completed source-first runtime-semantics ledger into reusable hardening routes. + +This design does not implement analyzer/generator changes. It only decides: + +1. which reusable hardening routes exist +2. how large each route is +3. which routes should be executed first +4. how downstream implementation slices should be decomposed + +## Why This Stage Exists + +The ledger proved that the current 102-scene set contains reusable generator-level gaps, not just isolated scene-specific defects. + +The project now needs a route map that: + +1. groups scenes by reusable rule fixes +2. avoids scene-by-scene patching +3. defines a strict implementation order +4. makes full rematerialization and validation refresh possible after hardening + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md` + +## Scope + +In scope: + +1. route clustering from the completed ledger +2. route prioritization by coverage and reuse +3. bounded child-plan sequencing for implementation +4. rematerialization dependency declaration +5. validation refresh dependency declaration + +Out of scope: + +1. any `src/` change +2. any skill manifest edit +3. any rematerialization +4. any validation rerun +5. any inner-network execution + +## Route Candidates + +The ledger already establishes five generator-level hardening routes. + +### Route 1: Resolver-to-Request Mapping Hardening + +Goal: + +Recover reusable mapping metadata between resolver outputs and request payload fields. + +Examples: + +1. `org_code -> orgno` +2. `period_payload.fdate -> fdate` +3. `period_payload.weekSfdate -> weekSfdate` +4. `period_payload.weekEfdate -> weekEfdate` + +Reason for high priority: + +This route spans the full 102-scene set and directly blocks runtime equivalence. + +### Route 2: Runtime URL Classification Hardening + +Goal: + +Separate URL roles during generation: + +1. runtime context URL +2. module route URL +3. API endpoint URL +4. entry URL hints + +Reason for high priority: + +This route also spans the full set and is required to stop callers from guessing `page_url` semantics at execution time. + +### Route 3: Embedded Dictionary Extraction Hardening + +Goal: + +Recover richer dictionaries and trees from source-side JS/HTML assets instead of shipping starter subsets only. + +Reason for high priority: + +This route also spans the full set and is the only scalable answer to sweep-030-style organization coverage failures. + +### Route 4: Parameter Default Semantics Recovery Hardening + +Goal: + +Recover page-native default period/date/mode behavior from source evidence into generated parameter metadata. + +Reason for priority: + +This route is slightly narrower than the first three but still affects the majority of scenes and is highly visible in runtime invocation. + +### Route 5: Invocation Alias Generation Hardening + +Goal: + +Broaden deterministic invocation coverage so user wording is not forced to match canonical scene names exactly. + +Reason for priority: + +This route is selective rather than universal, so it should follow structural hardening routes unless a route-local blocker proves otherwise. + +## Prioritization Rules + +Route order must be based on: + +1. breadth of scene coverage +2. generator-level reuse +3. ability to reduce repeat inner-network rediscovery +4. dependency order between routes + +It must not be based on: + +1. anecdotal scene debugging order +2. whichever single scene was most recently tested + +## Fixed Route Order + +The current route order should be: + +1. `resolver_request_mapping_hardening` +2. `runtime_url_classification_hardening` +3. `embedded_dictionary_extraction_hardening` +4. `parameter_default_semantics_recovery_hardening` +5. `alias_generation_hardening` + +## Implementation-Slice Policy + +No route should be implemented as one unbounded giant patch. + +Each route must later be split into bounded child plans with: + +1. a fixed scene bucket +2. explicit allowed files +3. explicit forbidden files +4. an expected coverage delta +5. a stop statement + +## Required Downstream Outputs + +This route-design stage must yield: + +1. one route-sequencing plan +2. one bounded implementation plan per top route +3. one full rematerialization refresh plan after route execution +4. one validation refresh plan after rematerialization + +## Acceptance Criteria + +This design is complete when: + +1. all five reusable routes are explicit +2. route order is fixed +3. route ordering is justified by ledger evidence, not anecdotes +4. downstream implementation decomposition rules are explicit +5. this stage remains design-only + +## Stop Statement + +Stop after publishing the route design and its child sequencing plan. + +Do not implement any hardening route inside this design. diff --git a/docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-sequence-design.md b/docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-sequence-design.md new file mode 100644 index 0000000..f743ad5 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-generated-scene-rule-hardening-route-sequence-design.md @@ -0,0 +1,53 @@ +# Generated Scene Rule Hardening Route Sequence Design + +> Date: 2026-04-20 +> Status: Draft +> Parent plan: +> - `docs/superpowers/plans/2026-04-20-generated-scene-rule-hardening-route-plan.md` + +## Intent + +Freeze the downstream execution sequence after the completed runtime-semantics ledger. + +This design does not implement hardening. It only turns the route order into concrete bounded child plans. + +## Fixed Route Order + +1. `resolver_request_mapping_hardening` +2. `runtime_url_classification_hardening` +3. `embedded_dictionary_extraction_hardening` +4. `parameter_default_semantics_recovery_hardening` +5. `alias_generation_hardening` + +## Child-Plan Policy + +Each child plan below is a first reusable implementation slice, not full route closure. + +That is intentional: + +1. each slice must stay bounded +2. each slice must operate on a route-local bucket +3. residual route expansion should happen only after implementation results are known + +## Required Downstream Plans + +1. `2026-04-20-generated-scene-resolver-request-mapping-hardening-plan.md` +2. `2026-04-20-generated-scene-runtime-url-classification-hardening-plan.md` +3. `2026-04-20-generated-scene-embedded-dictionary-extraction-hardening-plan.md` +4. `2026-04-20-generated-scene-parameter-default-semantics-hardening-plan.md` +5. `2026-04-20-generated-scene-invocation-alias-generation-hardening-plan.md` +6. `2026-04-20-generated-scene-runtime-semantics-rematerialization-refresh-plan.md` +7. `2026-04-20-generated-scene-runtime-semantics-validation-refresh-plan.md` + +## Acceptance Criteria + +This design is complete when: + +1. route order is fixed +2. child-plan list is explicit +3. rematerialization and validation refresh are frozen as mandatory dependencies +4. implementation still has not started + +## Stop Statement + +Stop after publishing the route sequence design and route sequence plan. diff --git a/docs/superpowers/specs/2026-04-20-generated-scene-runtime-semantics-gap-analysis-design.md b/docs/superpowers/specs/2026-04-20-generated-scene-runtime-semantics-gap-analysis-design.md new file mode 100644 index 0000000..2ba025a --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-generated-scene-runtime-semantics-gap-analysis-design.md @@ -0,0 +1,203 @@ +# Generated Scene Runtime Semantics Gap Analysis Design + +> Status: Superseded by `docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-design.md` + +## Objective + +Produce a bounded, implementation-free analysis of runtime semantics gaps across the final 102 generated scene skills, using `sweep-030-scene` as the anchor case that exposed five concrete gap classes during inner-network validation. + +This design does **not** modify analyzer, generator, runtime, skill manifests, or execution assets. It only defines how to analyze and classify the gaps that remain between: + +- `generated_scene` framework-level success +- real inner-network invocation / execution equivalence + +## Anchor Case + +The anchor case is: + +- `sweep-030-scene / 台区线损大数据-月_周累计线损率统计分析` + +Inner-network debugging exposed the following gap classes: + +1. `invocation_alias_gap` +2. `dictionary_recovery_gap` +3. `parameter_default_semantics_gap` +4. `resolver_to_request_mapping_gap` +5. `runtime_url_semantics_gap` + +The analysis generalizes these five classes across the full 102-scene final materialization set. + +## Scope + +In scope: + +- Analyze the final 102 generated skills under: + - `examples/scene_skill_102_final_materialization_2026-04-19/skills` +- Inspect: + - `scene.toml` + - `SKILL.toml` + - `references/generation-report.json` + - `references/org-dictionary.json` where present + - generated browser scripts where needed for request mapping evidence +- Compare generated assets against source-scene evidence when required to validate dictionary and runtime-url semantics +- Produce a 102-scene gap inventory and summary report + +Out of scope: + +- Any code change in `src/` +- Any edit to generated skill packages +- Any update to execution board / official board +- Any new pseudo-production execution +- Any new inner-network fix for a specific scene + +## Problem Statement + +The repository has already reached: + +- `102 / 102` framework auto-pass +- `102 / 102` final materialized skills +- deterministic invocation readiness + +But `sweep-030-scene` demonstrated that generated skills can still diverge from real runtime semantics in ways not captured by framework-level closure: + +- user phrasing differs from canonical scene name +- source scene contains complete org dictionaries not fully recovered into the generated skill +- source page defaults dates / periods while generated invocation initially required explicit period values +- resolver outputs and request field names do not align 1:1 +- runtime context URL semantics differ from module-route URL semantics + +Therefore the next bounded step is analysis, not implementation. + +## Gap Taxonomy + +Each scene may be tagged with zero or more of the following gap classes: + +### 1. `invocation_alias_gap` + +Definition: + +- Natural operator phrasing is likely not covered by current deterministic `include_keywords` + +Indicators: + +- Deterministic keywords only contain canonical scene title +- Scene title includes punctuation / separators / compound mode phrases +- Existing reports already required alias normalization + +### 2. `dictionary_recovery_gap` + +Definition: + +- Source scene contains embedded dictionaries / trees / option arrays, but generated skill only carries a starter subset or no dictionary at all + +Indicators: + +- Source contains files like `city.js`, `dict.js`, `enum.js`, `options.js` +- Source JS includes tree/option structures with labels/codes/children +- Generated `references/org-dictionary.json` is empty or much smaller than source evidence + +### 3. `parameter_default_semantics_gap` + +Definition: + +- Source page applies default values (date, period, mode, range, org) when user omits them, but generated skill currently treats them as required or unresolved + +Indicators: + +- Source contains `moment()` / date defaulting / initial query payloads +- Generated parameter readiness previously required explicit user input + +### 4. `resolver_to_request_mapping_gap` + +Definition: + +- Resolved semantic parameters do not align directly with actual request field names or payload layout used by the source page + +Indicators: + +- Resolver outputs `org_code` while request uses `orgno`, or analogous mismatches +- Generated request template uses placeholders not directly populated by resolver outputs +- Source request payload structure differs from generated request mapping + +### 5. `runtime_url_semantics_gap` + +Definition: + +- Generated skill does not clearly distinguish between app-entry URL, module-route URL, and API endpoint URL for runtime binding + +Indicators: + +- `scene.toml` only stores one `bootstrap.target_url` +- Inner-network execution shows app-entry URL succeeds while module-route URL fails, or vice versa +- Generation report contains both an app entry and a deeper route candidate + +## Inputs + +Primary inputs: + +- `examples/scene_skill_102_final_materialization_2026-04-19/skills` +- `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` + +Anchor-case source evidence: + +- `D:/desk/智能体资料/全量业务场景/一平台场景/台区线损大数据-月_周累计线损率统计分析` + +## Output Artifacts + +### 1. JSON inventory + +- `tests/fixtures/generated_scene/generated_scene_runtime_semantics_gap_analysis_2026-04-20.json` + +Required structure: + +- top-level summary counts by gap class +- per-scene records +- per-risk-bucket grouping + +Each scene record should include: + +- `sceneId` +- `sceneName` +- `archetype` +- `riskLevel` +- `gaps` +- `evidence` +- `recommendedFixRoutes` + +### 2. Human-readable report + +- `docs/superpowers/reports/2026-04-20-generated-scene-runtime-semantics-gap-analysis-report.md` + +The report must answer: + +1. How many scenes likely have each gap type +2. Which families / archetypes are most affected +3. Which gaps are generator-level +4. Which gaps are runtime-only and should not be pushed back into generation +5. Which next implementation routes should be prioritized + +## Risk Buckets + +Scenes should be grouped into: + +- `high`: multi-parameter or runtime-sensitive scenes where inner-network invocation is likely to diverge without further hardening +- `medium`: scenes with likely alias / dictionary / default-semantics issues but lower execution sensitivity +- `low`: scenes with no immediate evidence of these five gap classes + +## Acceptance Criteria + +This analysis is complete when: + +1. All 102 final materialized scenes have a runtime-semantics record +2. `sweep-030-scene` is explicitly analyzed under all applicable gap classes +3. Summary counts exist for all five gap classes +4. Dictionary recovery gap is supported by direct source-vs-generated evidence for the anchor case +5. The report recommends next implementation routes without changing code + +## Stop Statement + +Stop after publishing the JSON inventory and report. + +Do not open implementation work from this design. diff --git a/docs/superpowers/specs/2026-04-20-generated-scene-source-evidence-cross-scan-design.md b/docs/superpowers/specs/2026-04-20-generated-scene-source-evidence-cross-scan-design.md new file mode 100644 index 0000000..86c6929 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-generated-scene-source-evidence-cross-scan-design.md @@ -0,0 +1,165 @@ +# Generated Scene Source Evidence Cross-Scan Design + +> Date: 2026-04-20 +> Status: Draft +> Parent roadmap: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` + +## Intent + +Execute the first bounded child step of the source-first runtime semantics hardening roadmap: + +`scan the original 102 source scenes for high-signal runtime-semantics evidence` + +This design does not change analyzer, generator, manifests, or runtime behavior. It only defines how to scan original source-scene evidence so later rule-hardening routes can be derived from source truth rather than from already-generated skills alone. + +## Objective + +For every scene in the current 102-scene set: + +1. locate the original source directory +2. perform a bounded source evidence scan +3. record whether source-side evidence exists for the five anchor gap classes: + - `invocation_alias_gap` + - `dictionary_recovery_gap` + - `parameter_default_semantics_gap` + - `resolver_to_request_mapping_gap` + - `runtime_url_semantics_gap` + +## Scope + +In scope: + +1. source directories under: + - `D:/desk/智能体资料/全量业务场景/一平台场景` +2. current 102-scene mapping from existing materialization / board assets +3. bounded file-content scanning over high-signal files +4. JSON ledger + human-readable report + +Out of scope: + +1. any code change in `src/` +2. any generated skill change +3. any rematerialization +4. any execution board update +5. any pseudo-production execution + +## Required Scan Targets + +The scan should prioritize only high-signal evidence sources. + +### 1. Invocation alias evidence + +Signals: + +1. scene name variants +2. menu labels +3. button labels +4. route names +5. report titles +6. user-facing Chinese phrases in HTML / JS + +### 2. Dictionary recovery evidence + +Signals: + +1. `city.js` +2. `dict.js` +3. `enum.js` +4. `options*.js` +5. tree / option arrays with `label`, `value`, `code`, `children` + +### 3. Parameter default semantics evidence + +Signals: + +1. `moment(` +2. `dayjs(` +3. default query parameter assignment +4. implicit month/week/date initialization + +### 4. Resolver-to-request mapping evidence + +Signals: + +1. `$.ajax` +2. `fetch` +3. `contentType` +4. request `data` +5. request body field names +6. mode-specific request payloads + +### 5. Runtime URL semantics evidence + +Signals: + +1. app entry URLs +2. module route URLs +3. API endpoint URLs +4. host runtime / bootstrap page hints + +## Scan Strategy + +This is not a full source index. + +The scan should: + +1. use bounded heuristics and targeted filename/content patterns +2. avoid exhaustive deep parsing of every file +3. record evidence flags and representative evidence paths +4. be sufficient to classify scenes for later hardening routes + +## Inputs + +Primary inputs: + +1. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +2. `tests/fixtures/generated_scene/scene_execution_board_2026-04-18.json` +3. source root: + - `D:/desk/智能体资料/全量业务场景/一平台场景` + +Anchor validation source: + +1. `D:/desk/智能体资料/全量业务场景/一平台场景/台区线损大数据-月_周累计线损率统计分析` + +## Output Artifacts + +### JSON + +- `tests/fixtures/generated_scene/generated_scene_source_evidence_cross_scan_2026-04-20.json` + +Each scene record should include: + +1. `sceneId` +2. `sceneName` +3. `sourceDir` +4. `evidenceFlags` +5. `evidenceFiles` +6. `riskHints` + +### Report + +- `docs/superpowers/reports/2026-04-20-generated-scene-source-evidence-cross-scan-report.md` + +The report must answer: + +1. how many scenes show dictionary evidence +2. how many scenes show default parameter semantics +3. how many scenes show request field aliasing +4. how many scenes show multi-URL semantics +5. which scenes look most similar to `sweep-030-scene` + +## Acceptance Criteria + +This design is complete when: + +1. all 102 scenes are included in the cross-scan +2. the five evidence families are explicit +3. the output JSON structure is defined +4. the scan remains analysis-only + +## Stop Statement + +Stop after publishing the child design and child plan. + +Do not execute the scan inside this design. diff --git a/docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-design.md b/docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-design.md new file mode 100644 index 0000000..48d9c52 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-design.md @@ -0,0 +1,298 @@ +# Generated Scene Source-First Runtime Semantics Hardening Design + +> Date: 2026-04-20 +> Status: Draft +> Supersedes: +> - `docs/superpowers/specs/2026-04-20-generated-scene-runtime-semantics-gap-analysis-design.md` +> Upstream Parent: +> - `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Upstream Materialization: +> - `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` + +## Intent + +Define the next parent roadmap for `generated_scene` after framework closure has already been achieved. + +The purpose is no longer: + +- whether the `102` scenes can be generated into skills + +That has already been proven. + +The purpose is now: + +- scan the original `102` source scenes for runtime-semantics evidence +- identify all scenes that can reproduce the same class of divergence exposed by `sweep-030-scene` +- harden analyzer / generator / manifest rules at the rule level rather than scene-by-scene +- regenerate the full `102` skill set from the hardened rules +- rerun validation assets so future inner-network execution does not rediscover the same class of defects one scene at a time + +This design deliberately moves from a weak `generated-skill-first` analysis to a stronger `source-first` analysis and regeneration program. + +## Why the Previous Analysis Was Not Enough + +The superseded analysis-only design focused mainly on the already-generated skill assets. + +That is insufficient for the actual project goal, because the goal is not simply to describe gaps that already surfaced in generated skills. The goal is to: + +1. proactively find other source scenes with the same latent runtime-semantics risks as `sweep-030-scene` +2. correct the generation rules once +3. regenerate the full 102-scene bundle +4. avoid repeated inner-network rediscovery of the same class of defects + +Therefore the correct parent approach must be source-first. + +## Anchor Problem Family + +`sweep-030-scene / 台区线损大数据-月_周累计线损率统计分析` exposed five reusable gap classes: + +1. `invocation_alias_gap` +2. `dictionary_recovery_gap` +3. `parameter_default_semantics_gap` +4. `resolver_to_request_mapping_gap` +5. `runtime_url_semantics_gap` + +The roadmapping problem is no longer “fix sweep-030”. + +It is: + +`find every source scene in the current 102 set that can reproduce one or more of these five gap classes, then harden generation rules and rematerialize the whole set` + +## Source-First Principle + +For this roadmap, the original source scenes are the primary truth. + +Generated skills are secondary, derived artifacts used for comparison. + +This means: + +1. risk discovery starts from original source-scene files, not from generated output alone +2. generated skills are used to measure what is missing compared with source evidence +3. implementation targets rule-level recovery, not scene-name patching +4. the roadmap is incomplete until the full 102 skills are regenerated from hardened rules + +## Scope + +In scope: + +1. Scan the original 102 source-scene directories under: + - `D:/desk/智能体资料/全量业务场景/一平台场景` +2. Cross-map each source scene to the current final generated skill +3. Detect source-side evidence for the five runtime-semantics gap classes +4. Produce a full risk ledger for all 102 scenes +5. Define the bounded implementation routes required to harden generation rules +6. Define the required full rematerialization and validation refresh after rule changes + +Out of scope: + +1. Inner-network execution itself +2. Login / credential handling +3. Host-bridge runtime hardening outside current generated-scene semantics +4. Scene-by-scene ad hoc inner-network patching as the primary method + +## Problem Restatement + +The repository already reached: + +1. `102 / 102` framework auto-pass +2. `102 / 102` materialized skills +3. deterministic invocation readiness +4. full direct mock pass + +But `sweep-030-scene` proved that generated skills can still diverge from original scene runtime semantics in ways that only surface when actually invoked in a browser-attached environment. + +The project cannot sustainably close that gap by waiting for each scene to fail in inner-network execution. + +The missing capability is: + +`source-first runtime semantics extraction and rule hardening` + +## Runtime-Semantics Gap Taxonomy + +The five anchor gap classes remain the canonical taxonomy. + +### 1. `invocation_alias_gap` + +The original scene affords natural operator phrasing, but the generated deterministic manifest is too narrow. + +### 2. `dictionary_recovery_gap` + +The original scene contains embedded dictionaries, trees, or option structures, but the generated skill only restores a starter subset or no dictionary. + +### 3. `parameter_default_semantics_gap` + +The original page supplies default time / mode / org semantics, but the generated skill initially treats the parameter as explicitly required. + +### 4. `resolver_to_request_mapping_gap` + +The generated resolver output names are not the actual request payload field names used by the original page. + +### 5. `runtime_url_semantics_gap` + +The generated skill does not properly separate: + +1. app-entry URL +2. module-route URL +3. API endpoint URL +4. runtime browser context URL + +## New Required Source-Side Scan + +The new parent roadmap must explicitly scan the original source scenes for high-signal evidence. + +### Evidence families to scan + +1. Dictionary files + - `city.js` + - `dict.js` + - `enum.js` + - `options*.js` + - tree / option / label-code-value arrays + +2. Default-parameter semantics + - `moment(` + - `dayjs(` + - month/week defaulting + - implicit query payload initialization + +3. Request payload semantics + - `$.ajax` + - `fetch` + - `contentType` + - `data` + - request body field names + +4. Runtime URL semantics + - app entry URLs + - module route URLs + - menu navigation targets + - bootstrap candidates + +5. Invocation alias evidence + - titles + - menu labels + - button text + - route names + - report names + - operator-facing wording + +### Required output of the scan + +For each source scene: + +1. whether embedded dictionaries exist +2. whether page defaults exist +3. whether request-field aliasing exists +4. whether multiple URL kinds exist +5. whether natural alias variation is likely + +## Work Product Hierarchy + +The roadmap should produce three layers of output. + +### Layer 1: Source-Side Risk Ledger + +A full 102-scene ledger that starts from original source evidence. + +### Layer 2: Rule-Hardening Route Map + +A route map that groups scenes by reusable rule fixes rather than by scene name. + +### Layer 3: Rematerialization + Validation Refresh Plan + +A controlled plan for regenerating all 102 skills and refreshing validation assets after the rule changes land. + +## Core Routes + +The source-first roadmap must be split into these fixed routes: + +### Route A: Source Cross-Scan and Evidence Ledger + +Goal: + +Build a full 102-scene source-first runtime-semantics risk inventory. + +### Route B: Rule-Level Hardening Design + +Goal: + +Translate the source-first gaps into rule-level changes for analyzer/generator/manifest output. + +Primary targets: + +1. alias generation +2. dictionary extraction +3. parameter default recovery +4. resolver-to-request field mapping +5. runtime URL classification + +### Route C: Bounded Implementation Slices + +Goal: + +Implement the rule-level hardening in bounded slices organized by reusable fix route, not by single scene. + +### Route D: Full 102 Rematerialization + +Goal: + +Regenerate all 102 skills after hardening so the new rules actually propagate to the released skill bundle. + +### Route E: Validation Refresh + +Goal: + +Refresh: + +1. deterministic invocation readiness +2. parameter readiness +3. static validation +4. direct mock execution +5. offline / pseudo-production handoff assets + +## Inputs + +Primary source inventory: + +- `D:/desk/智能体资料/全量业务场景/一平台场景` + +Primary generated comparison inventory: + +- `examples/scene_skill_102_final_materialization_2026-04-19/skills` + +Supporting assets: + +- `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +- `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +- `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` + +## Deliverables + +### 1. Source-first risk ledger + +- `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +### 2. Source-first analysis report + +- `docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-report.md` + +### 3. Rule-hardening roadmap outputs + +Not implemented in this design, but this design must define the bounded next plans that follow the ledger. + +## Acceptance Criteria + +This design is successful when: + +1. it explicitly requires source-scene cross-scan over the full 102 set +2. it no longer relies on generated-skill-only inspection as the main discovery method +3. it makes full rematerialization a required downstream step +4. it treats `sweep-030-scene` as an anchor case, not a one-off patch +5. it defines a route from source scan to rule hardening to regeneration + +## Stop Rule + +Stop after publishing the parent design and parent plan. + +Do not begin source scanning or implementation inside this design document. diff --git a/docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-design.md b/docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-design.md new file mode 100644 index 0000000..0a629e1 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-design.md @@ -0,0 +1,200 @@ +# Generated Scene Source-First Runtime Semantics Ledger Design + +> Date: 2026-04-20 +> Status: Draft +> Parent roadmap: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-first-runtime-semantics-hardening-plan.md` +> Upstream scan: +> - `docs/superpowers/plans/2026-04-20-generated-scene-source-evidence-cross-scan-plan.md` + +## Intent + +Define the second bounded child step of the source-first runtime semantics hardening roadmap: + +`merge source-side evidence with generated-skill evidence into a full 102-scene runtime-semantics ledger` + +This design is still analysis-only. It does not modify `src/`, generated skills, validation assets, or execution-board state. + +## Objective + +For every scene in the current 102-scene set: + +1. merge source-side evidence from the completed cross-scan +2. compare that evidence against current generated skill manifests and references +3. assign one or more canonical runtime-semantics gap classes +4. assign a bounded `riskLevel` +5. distinguish: + - reusable generator-level rule gap + - runtime-only residual +6. publish a source-first runtime-semantics ledger that becomes the only valid input for later hardening-route design + +## Fixed Gap Taxonomy + +The ledger must continue using the five gap classes already anchored by `sweep-030-scene`: + +1. `invocation_alias_gap` +2. `dictionary_recovery_gap` +3. `parameter_default_semantics_gap` +4. `resolver_to_request_mapping_gap` +5. `runtime_url_semantics_gap` + +No additional gap class should be invented inside this ledger stage unless the evidence is clearly outside these five and cannot be expressed as a subtype. + +## Scope + +In scope: + +1. the completed source cross-scan asset +2. the current final generated skills under `examples/scene_skill_102_final_materialization_2026-04-19/skills` +3. current deterministic invocation readiness assets +4. current natural-language parameter readiness assets +5. current parameter dictionary normalization assets +6. source-to-generated comparison for all 102 scenes +7. JSON ledger + human-readable report + +Out of scope: + +1. any change in `src/` +2. any skill manifest or script edit +3. any rematerialization +4. any validation rerun +5. any inner-network execution + +## Required Comparisons + +The ledger stage must compare source evidence with generated output along these axes. + +### 1. Invocation alias comparison + +Check whether source-side operator wording, labels, route names, or titles imply broader natural-language coverage than the current generated `include_keywords`. + +### 2. Dictionary comparison + +Check whether source-side dictionaries, trees, or option arrays imply a richer entity dictionary than the generated `references/*dictionary*.json` assets currently expose. + +### 3. Parameter default semantics comparison + +Check whether source-side date / period / mode initialization implies a default-value policy that the generated manifest or resolver metadata does not currently preserve. + +### 4. Resolver-to-request mapping comparison + +Check whether source-side request field names differ from generated resolver output names and whether the generated skill currently encodes an explicit mapping. + +### 5. Runtime URL comparison + +Check whether source-side evidence implies multiple URL roles: + +1. app entry URL +2. module route URL +3. API endpoint URL +4. runtime browser context URL + +and whether the generated skill currently collapses those roles into a single ambiguous target. + +## Ledger Schema + +Each scene record in the runtime-semantics ledger should include: + +1. `sceneId` +2. `sceneName` +3. `sourceDir` +4. `archetype` +5. `readiness` +6. `riskLevel` +7. `gaps` +8. `generatorLevelGap` +9. `runtimeOnlyResidual` +10. `recommendedFixRoutes` +11. `sourceEvidenceSummary` +12. `generatedEvidenceSummary` +13. `comparisonNotes` + +## Risk-Level Rules + +The ledger should use bounded, reproducible risk levels: + +### `high` + +Use when the scene has strong source evidence for one or more gap classes and the current generated skill visibly lacks equivalent semantics. + +### `medium` + +Use when the scene has source evidence for one or more gap classes, but current generated output appears partially aligned or the mismatch is plausible rather than explicit. + +### `low` + +Use when source evidence exists but generated output already appears materially aligned, or when the residual is likely runtime-only rather than generator-level. + +## Generator-Level vs Runtime-Only + +The ledger must classify whether a scene's residuals should later drive generator hardening or should remain runtime-only. + +### `generatorLevelGap = true` + +Use when source evidence proves the generated skill is missing semantics that should be recoverable during generation. + +### `runtimeOnlyResidual = true` + +Use when the remaining risk is primarily: + +1. login / session +2. host runtime behavior +3. local-doc / host-bridge environment +4. inner-network-only execution context + +and not a generation-semantic omission. + +These two flags are not always mutually exclusive, but the ledger must explain why. + +## Inputs + +Primary inputs: + +1. `tests/fixtures/generated_scene/generated_scene_source_evidence_cross_scan_2026-04-20.json` +2. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +3. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` +4. `tests/fixtures/generated_scene/scene_skill_102_natural_language_parameter_readiness_2026-04-20.json` +5. `tests/fixtures/generated_scene/scene_skill_102_parameter_dictionary_template_normalization_2026-04-20.json` + +Anchor runtime findings: + +1. the confirmed `sweep-030-scene` inner-network findings: + - alias mismatch + - starter-subset org dictionary + - page-semantic default period behavior + - request-field mismatch + - runtime context URL ambiguity + +## Output Artifacts + +### JSON + +- `tests/fixtures/generated_scene/generated_scene_source_first_runtime_semantics_ledger_2026-04-20.json` + +### Report + +- `docs/superpowers/reports/2026-04-20-generated-scene-source-first-runtime-semantics-ledger-report.md` + +The report must answer: + +1. how many scenes are `high`, `medium`, `low` +2. how many scenes carry each gap class +3. how many scenes appear to require generator-level fixes +4. how many scenes look runtime-only +5. which route clusters are likely to yield the highest reuse + +## Acceptance Criteria + +This design is complete when: + +1. it defines a full-scene ledger stage rather than scene-by-scene notes +2. it binds the ledger to the fixed five-gap taxonomy +3. it defines how source evidence and generated evidence are compared +4. it defines `riskLevel`, `generatorLevelGap`, and `runtimeOnlyResidual` +5. it remains analysis-only + +## Stop Statement + +Stop after publishing this ledger design and its child plan. + +Do not execute the ledger build inside this design. diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-deterministic-invocation-readiness-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-deterministic-invocation-readiness-design.md new file mode 100644 index 0000000..a22ac56 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-deterministic-invocation-readiness-design.md @@ -0,0 +1,59 @@ +# Scene Skill 102 Deterministic Invocation Readiness Design + +> Date: 2026-04-20 +> Parent Asset: `examples/scene_skill_102_final_materialization_2026-04-19` + +## Intent + +Prepare the final materialized scene skills for deterministic natural-language invocation through sgClaw. + +This design addresses the gap between a generated skill package and a deterministic callable scene skill. The sgClaw runtime already has a registry-backed deterministic dispatch path for instructions ending with `。。。`, but the final materialized `scene.toml` files are not yet normalized for that convention. + +## Current Findings + +1. `101 / 102` scenes have complete skill packages and `scene.toml`. +2. `1 / 102` scene, `sweep-012-scene / 业扩报装管理制度`, is still a materialization failure. +3. `0 / 101` complete generated scene manifests currently use the deterministic suffix `。。。`. +4. `101 / 101` complete generated scene manifests currently use exactly one include keyword, usually the full scene name. +5. `10 / 101` complete generated scene manifests currently define runtime-supported params. + +## Scope + +Allowed: + +1. Analyze deterministic invocation readiness for the complete final skill set. +2. Normalize `scene.toml` deterministic metadata for complete packages. +3. Generate invocation samples and dispatch dry-run evidence. +4. Publish readiness assets and reports. + +Forbidden: + +1. Do not execute browser scripts. +2. Do not change generated JavaScript scripts. +3. Do not modify sgClaw runtime dispatch code unless a later dedicated implementation plan is created. +4. Do not repair `sweep-012-scene`. +5. Do not start static, mock, or production validation. +6. Do not rename skill directories. + +## Readiness Model + +A skill is deterministic-invocation-ready when: + +1. it has a valid `scene.toml`; +2. `[deterministic].suffix = "。。。"`; +3. `include_keywords` can match at least the readable scene name; +4. the scene can be uniquely selected by dispatch dry-run using an expected invocation sample; +5. required params either resolve or produce a structured prompt. + +## Expected Outputs + +1. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_2026-04-20.json` +2. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_samples_2026-04-20.json` +3. `docs/superpowers/reports/2026-04-20-scene-skill-102-deterministic-invocation-readiness-report.md` + +## Completion Criteria + +1. All 101 complete packages have deterministic suffix normalized to `。。。`. +2. Dispatch dry-run results are available for all complete packages. +3. The failed `sweep-012-scene` remains explicitly excluded and listed as not ready. +4. No browser execution or production validation is performed. diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-full-direct-mock-execution-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-full-direct-mock-execution-design.md new file mode 100644 index 0000000..6d7d736 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-full-direct-mock-execution-design.md @@ -0,0 +1,76 @@ +# Scene Skill 102 Full Direct Mock Execution Design + +> Date: 2026-04-20 +> Status: Draft +> Upstream Mock Harness: `docs/superpowers/plans/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-plan.md` +> Input Harness Results: `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_harness_results_2026-04-20.json` + +## Intent + +Extend mock runtime validation from representative execution to direct execution of all `102` materialized scene skills. + +This design remains strictly local and mock-only. It does not perform real browser execution, production system access, or business-data validation. + +## Current Baseline + +The previous mock runtime harness run produced: + +| Status | Count | +| --- | ---: | +| representative `mock-runtime-pass` | 19 | +| representative failures | 0 | + +That result proves archetype-level representative viability, but it does not prove every generated script can directly execute in a mock runtime. + +## Goal + +Produce a direct mock runtime result for all `102` materialized skills. + +Each scene must receive exactly one of: + +1. `direct-mock-pass` +2. `direct-mock-partial` +3. `direct-mock-fail` + +## Validation Boundary + +Allowed: + +1. read final generated skill packages +2. load generated scripts in Node +3. inject fake runtime dependencies +4. invoke `buildBrowserEntrypointResult` +5. write direct mock result assets and report + +Forbidden: + +1. do not modify generated skill packages +2. do not modify `src/generated_scene/analyzer.rs` +3. do not modify `src/generated_scene/generator.rs` +4. do not rematerialize skills +5. do not update official board +6. do not open a real browser +7. do not access real network or production systems +8. do not claim production pass + +## Expected Output + +1. `tests/fixtures/generated_scene/scene_skill_102_full_direct_mock_execution_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-full-direct-mock-execution-report.md` + +## Interpretation + +If all `102` scenes pass direct mock execution, the project can say: + +`102 / 102 generated skills can load and execute their primary entrypoint under controlled fake dependencies.` + +It still cannot say: + +`102 / 102 generated skills are production-ready.` + +## Stop Rule + +Stop after direct mock results and report are published. + +Do not start pseudo-production batch selection under this design. + diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-design.md new file mode 100644 index 0000000..e239a8a --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-mock-runtime-harness-implementation-design.md @@ -0,0 +1,150 @@ +# Scene Skill 102 Mock Runtime Harness Implementation Design + +> Date: 2026-04-20 +> Status: Draft +> Upstream Validation: `docs/superpowers/plans/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-plan.md` +> Input Matrix: `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json` + +## Intent + +Define a bounded implementation stage for mock runtime harnesses after the `102` materialized skill set has passed static package validation and deterministic dispatch dry-run. + +This design is not production validation. It exists to prove that generated skill scripts can be loaded and exercised against controlled fake dependencies before any real browser, host bridge, or production system is touched. + +## Current Baseline + +From `scene_skill_102_mock_runtime_validation_matrix_2026-04-20.json`: + +| Archetype | Count | Representative scenes | +| --- | ---: | --- | +| `paginated_enrichment` | 51 | `sweep-001-scene`, `sweep-002-scene`, `sweep-003-scene` | +| `host_bridge_workflow` | 26 | `sweep-007-scene`, `sweep-009-scene`, `sweep-010-scene` | +| `multi_mode_request` | 10 | `sweep-020-scene`, `sweep-023-scene`, `sweep-030-scene` | +| `local_doc_pipeline` | 6 | `sweep-012-scene`, `sweep-017-scene`, `sweep-019-scene` | +| `single_request_enrichment` | 5 | `sweep-013-scene`, `sweep-016-scene`, `sweep-068-scene` | +| `multi_endpoint_inventory` | 2 | `sweep-084-scene`, `sweep-085-scene` | +| `page_state_eval` | 2 | `sweep-066-scene`, `sweep-094-scene` | + +Current matrix status: + +| Status | Count | +| --- | ---: | +| `mock-covered-by-representative` | 19 | +| `mock-needs-harness` | 83 | + +Important interpretation: + +`mock-covered-by-representative` currently means representative selection only. It does not mean scripts have been executed in a mock runtime. + +## Harness Layers + +### Layer 1: Script Load Harness + +Purpose: + +1. load generated browser scripts in a controlled JavaScript runtime +2. verify the entry module does not fail during parse/load +3. verify referenced helper files are present + +Output status: + +`script-load-pass` or `script-load-fail` + +### Layer 2: Fake Dependency Harness + +Purpose: + +Provide controlled fake implementations for: + +1. `fetch` +2. browser DOM +3. host bridge action/callback +4. local document service +5. artifact writer + +Output status: + +`mock-dependency-ready` or `mock-dependency-missing` + +### Layer 3: Representative Flow Harness + +Purpose: + +Run representative scene scripts far enough to prove control-flow integrity. + +Checks: + +1. expected request or host action is attempted +2. controlled empty-data response is handled +3. controlled non-empty response is normalized +4. artifact metadata is produced when declared +5. error response does not crash outside structured failure path + +Output status: + +`mock-runtime-pass`, `mock-runtime-fail`, or `mock-runtime-partial` + +### Layer 4: Matrix Propagation + +Purpose: + +Propagate representative results to same-archetype scenes without claiming direct execution for every scene. + +Output statuses: + +1. `mock-runtime-representative-pass` +2. `mock-runtime-covered-by-representative` +3. `mock-runtime-needs-direct-run` +4. `mock-runtime-fail` + +## Route Order + +The route order is fixed: + +1. `Route 1`: `paginated_enrichment` mock harness +2. `Route 2`: `multi_mode_request` and `single_request_enrichment` mock harnesses +3. `Route 3`: `multi_endpoint_inventory` and `page_state_eval` mock harnesses +4. `Route 4`: `local_doc_pipeline` and `host_bridge_workflow` mock harnesses +5. `Route 5`: publish integrated mock runtime validation report + +Rationale: + +1. `paginated_enrichment` is the largest bucket and should validate the most reused generated flow first. +2. `multi_mode_request` and `single_request_enrichment` are mainline API flows and can share fake fetch infrastructure. +3. `multi_endpoint_inventory` and `page_state_eval` are small buckets and should be validated after the mainline fetch harness exists. +4. `local_doc_pipeline` and `host_bridge_workflow` require more specialized fakes and must not drive the first harness implementation. + +## Scope Guardrails + +Allowed: + +1. add mock validation harness files +2. add mock validation tests +3. read generated final skill packages +4. execute generated scripts only inside a mock runtime +5. publish mock runtime validation result assets and reports + +Forbidden: + +1. do not modify generated skill scripts under `examples/scene_skill_102_final_materialization_2026-04-19/skills` +2. do not modify `src/generated_scene/analyzer.rs` +3. do not modify `src/generated_scene/generator.rs` +4. do not rematerialize the `102` skills +5. do not update `scene_execution_board_2026-04-18.json` +6. do not start real browser execution +7. do not connect to real business systems +8. do not require production credentials, VPN, SSO, or internal network access + +## Expected Assets + +1. `tests/fixtures/generated_scene/scene_skill_102_mock_runtime_harness_results_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-mock-runtime-harness-report.md` + +## Stop Rules + +Stop after representative mock runtime results and the integrated report are published. + +Do not continue into production validation under this plan. + +Do not claim `102 / 102` real runtime pass from mock results. + diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-natural-language-parameter-readiness-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-natural-language-parameter-readiness-design.md new file mode 100644 index 0000000..9194c8c --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-natural-language-parameter-readiness-design.md @@ -0,0 +1,99 @@ +# Scene Skill 102 Natural-Language Parameter Readiness Design + +> Date: 2026-04-20 +> Parent: `2026-04-20-scene-skill-102-deterministic-invocation-readiness-design.md` + +## Intent + +Clarify whether the final 102 materialized scene skills can be invoked with natural-language query conditions before pseudo-production testing. + +This design separates deterministic dispatch readiness from parameter readiness. A skill can be selected by an instruction ending with `。。。` while still not being able to parse query conditions such as organization, period, date, or report mode. + +## Current Baseline + +1. `102 / 102` scene skills are final-materialized. +2. `102 / 102` scene skills are deterministic dispatch ready for `。。。` suffix invocation. +3. `102 / 102` scene skills pass full direct mock execution. +4. Only a subset currently declares explicit `[[params]]` in `scene.toml`. + +## Problem Statement + +Internal-network validation should not use only `场景名。。。` as the invocation pattern for every skill. + +Parameterized skills must be validated with representative natural-language query conditions. For example: + +```text +兰州公司 台区线损大数据 月累计线损率统计分析。。。 +``` + +This should resolve: + +1. `兰州公司` as organization; +2. `月累计` as period mode; +3. the scene keywords as deterministic skill selection evidence. + +If a skill has required params but lacks usable resolver resources, it must be flagged before pseudo-production execution. + +## Scope + +Allowed: + +1. Analyze all final 102 skill manifests. +2. Classify parameter readiness for each scene. +3. Generate recommended natural-language invocation samples. +4. Identify resolver-resource gaps. +5. Publish readiness JSON and report. + +Forbidden: + +1. Do not modify `src/compat/scene_platform/dispatch.rs`. +2. Do not modify `src/compat/scene_platform/resolvers.rs`. +3. Do not modify `src/generated_scene/analyzer.rs`. +4. Do not modify `src/generated_scene/generator.rs`. +5. Do not edit final generated skill packages. +6. Do not execute browser, host bridge, localhost services, or production network. +7. Do not update `scene_execution_board_2026-04-18.json`. + +## Readiness Classes + +### `parameter-ready` + +The skill declares required params and all required resolver resources are present and populated enough to support deterministic parsing. + +### `parameter-gap` + +The skill declares required params, but at least one required resolver cannot currently resolve real user input because of missing, empty, or unsupported resolver configuration. + +### `parameter-not-required` + +The skill has no declared required params. It may still accept descriptive natural language, but the current runtime will primarily use it for deterministic scene selection rather than structured argument extraction. + +### `parameter-implicit-risk` + +The skill has no declared required params, but the scene name suggests likely query conditions such as month, week, day, company, county, report period, or business object. These scenes should be tested carefully because user wording may imply filters that current manifests do not parse. + +## Output Model + +Each scene record should include: + +1. `sceneId` +2. `sceneName` +3. `archetype` +4. `skillDir` +5. `hasParams` +6. `requiredParams` +7. `resolverStatus` +8. `parameterReadiness` +9. `recommendedInvocation` +10. `minimalInvocation` +11. `parameterizedInvocation` +12. `gaps` +13. `notes` + +## Completion Criteria + +1. All `102` scenes are classified. +2. The report states how many scenes require explicit query conditions. +3. The report states how many required-param scenes are actually resolver-ready. +4. The report states which scenes should not be validated with only `场景名。。。`. +5. The report does not claim production readiness. diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-design.md new file mode 100644 index 0000000..12203fd --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-parameter-dictionary-template-normalization-design.md @@ -0,0 +1,75 @@ +# Scene Skill 102 Parameter Dictionary And Invocation Template Normalization Design + +> Date: 2026-04-20 +> Parent: `2026-04-20-scene-skill-102-natural-language-parameter-readiness-design.md` + +## Intent + +Make the `10` required-param scene skills usable with natural-language query conditions before pseudo-production batch execution. + +This design does not broaden deterministic dispatch. It only closes the immediate parameter-readiness gap discovered in `scene_skill_102_natural_language_parameter_readiness_2026-04-20.json`. + +## Current Gap + +`10` `multi_mode_request` skills declare required params: + +1. `org` via `dictionary_entity` +2. `period` via `month_week_period` + +However, each generated `references/org-dictionary.json` is currently an empty array. Therefore inputs such as: + +```text +兰州公司 台区线损大数据 月累计 2026-03。。。 +``` + +have the correct shape, but cannot resolve `兰州公司` into `org_label` and `org_code`. + +## Scope + +Allowed: + +1. Populate `references/org-dictionary.json` for the fixed `10` required-param skills. +2. Use the existing, already-tested minimal organization aliases from deterministic-submit fixtures: + - `国网兰州供电公司` / `62401` + - `城关供电分公司` / `6240108` + - `国网天水供电公司` / `62403` +3. Refresh natural-language parameter readiness assets. +4. Refresh invocation samples. +5. Refresh pseudo-production handoff inputs for selected scenes that require params. + +Forbidden: + +1. Do not modify `src/compat/scene_platform/dispatch.rs`. +2. Do not modify `src/compat/scene_platform/resolvers.rs`. +3. Do not modify `src/generated_scene/analyzer.rs`. +4. Do not modify `src/generated_scene/generator.rs`. +5. Do not execute browser, host bridge, localhost services, or production network. +6. Do not claim this starter dictionary is a full production organization dictionary. +7. Do not add params to the other `92` non-param skills under this plan. + +## Starter Dictionary Policy + +The populated dictionary is a pseudo-production starter dictionary. It is sufficient to validate the natural-language parameter plumbing for inputs such as `兰州公司` and `月累计 2026-03`. + +It is not a full province-wide organization dictionary. A later production hardening step may replace or expand it with the real unit tree. + +## Readiness Target + +After this plan: + +1. `10 / 10` required-param skills should move from `parameter-gap` to `parameter-ready`. +2. Their recommended invocation samples should include: + - organization alias; + - scene name; + - month/week mode; + - concrete period value; + - `。。。` suffix. +3. Pseudo-production handoff should not use bare scene names for these scenes. + +## Completion Criteria + +1. All fixed `10` dictionaries are non-empty and parseable. +2. Natural-language parameter readiness is refreshed. +3. Invocation samples are refreshed. +4. Pseudo-production handoff is refreshed for selected required-param scenes. +5. No runtime source files are modified. diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-design.md new file mode 100644 index 0000000..46736f3 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-design.md @@ -0,0 +1,90 @@ +# Scene Skill 102 Pseudo-Production Batch Execution Design + +> Date: 2026-04-20 +> Parent: `2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-plan.md` + +## Intent + +Execute the prepared 10-scene pseudo-production batch in an operator-provided quasi-production or production-like environment and collect structured evidence. + +This design defines execution boundaries and result recording. It does not embed credentials or require credentials to be stored in the repository. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_handoff_2026-04-20.json` +2. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_evidence_checklist_2026-04-20.json` +3. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_execution_record_template_2026-04-20.json` +4. final materialized skill directory: + `examples/scene_skill_102_final_materialization_2026-04-19/skills` + +## Execution Boundary + +Pseudo-production execution may only run in an operator-approved environment where the operator has provided: + +1. browser or sgClaw runtime access +2. required network access +3. valid session state outside the repository +4. a local evidence output directory +5. approval to capture redacted logs/screenshots/artifacts + +## Credential Rule + +Never store these in the repository: + +1. passwords +2. cookies +3. access tokens +4. Authorization headers +5. private keys +6. VPN secrets +7. internal session dumps + +Execution records may reference that an operator-provided session was used, but must not include the session material. + +## Execution Result States + +Each selected scene must resolve to exactly one: + +1. `pseudo-prod-pass` +2. `login-blocked` +3. `network-blocked` +4. `host-bridge-blocked` +5. `local-doc-runtime-blocked` +6. `data-mismatch` +7. `artifact-mismatch` +8. `environment-unavailable` +9. `runtime-error` + +## Evidence Requirements + +Each selected scene must collect: + +1. `execution-record.json` +2. console log +3. network summary +4. screenshot when browser target page is required +5. exported artifact if produced +6. notes + +All evidence must be redacted before committing any summary to the repository. + +## Repository Outputs + +The repository should receive only redacted and structured execution summaries: + +1. `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_batch_execution_results_2026-04-20.json` +2. `docs/superpowers/reports/2026-04-20-scene-skill-102-pseudoprod-batch-execution-report.md` + +Raw evidence may remain outside the repository unless explicitly redacted. + +## Forbidden Scope + +This design does not allow: + +1. committing credentials +2. modifying generated skill packages +3. modifying analyzer/generator/runtime code +4. updating official board status +5. expanding beyond the selected 10 scenes +6. treating pseudo-production pass as full production certification + diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-design.md new file mode 100644 index 0000000..cc8e5b8 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-execution-preparation-design.md @@ -0,0 +1,100 @@ +# Scene Skill 102 Pseudo-Production Batch Execution Preparation Design + +> Date: 2026-04-20 +> Parent: `2026-04-20-scene-skill-102-pseudoprod-batch-selection-plan.md` + +## Intent + +Prepare the first pseudo-production execution batch without running it. + +This design defines environment handoff requirements, evidence collection rules, and execution record templates for the 10 selected scenes from the batch selection plan. + +## Fixed Batch + +The execution preparation batch is fixed by: + +- `tests/fixtures/generated_scene/scene_skill_102_pseudoprod_batch_selection_2026-04-20.json` + +The batch contains 10 scenes across: + +1. `paginated_enrichment`: 4 +2. `multi_mode_request`: 2 +3. `single_request_enrichment`: 2 +4. `multi_endpoint_inventory`: 1 +5. `page_state_eval`: 1 + +## Preparation Only + +This design does not allow real execution. + +It only prepares: + +1. environment handoff checklist +2. evidence package layout +3. per-scene execution record template +4. failure taxonomy mapping +5. operator instructions + +## Environment Handoff Requirements + +The operator must provide or confirm these outside the repository: + +1. target browser or quasi-production host +2. network access to required internal endpoints +3. valid login/session state where needed +4. allowed output directory for downloaded/exported artifacts +5. console log capture method +6. network log capture method +7. screenshot capture method + +No credentials, tokens, cookies, or secrets should be stored in the repository. + +## Evidence Package Layout + +Each scene should use a local evidence folder outside tracked credentials: + +```text +pseudoprod_evidence/ + / + execution-record.json + console.log + network-summary.json + screenshot.png + exported-artifact.* + notes.md +``` + +The repository may store templates and redacted summaries, but not sensitive raw credentials or session material. + +## Execution Result States + +Each scene must resolve to one of: + +1. `pseudo-prod-pass` +2. `login-blocked` +3. `network-blocked` +4. `host-bridge-blocked` +5. `local-doc-runtime-blocked` +6. `data-mismatch` +7. `artifact-mismatch` +8. `environment-unavailable` +9. `runtime-error` + +## Forbidden Scope + +This design does not allow: + +1. running browser automation +2. invoking real target systems +3. storing credentials +4. modifying generated skills +5. modifying `analyzer.rs`, `generator.rs`, dispatch, or runtime code +6. updating official board status + +## Expected Outputs + +1. environment handoff checklist JSON +2. per-scene execution record template JSON +3. evidence checklist JSON +4. preparation report + diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-selection-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-selection-design.md new file mode 100644 index 0000000..44233ff --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-pseudoprod-batch-selection-design.md @@ -0,0 +1,97 @@ +# Scene Skill 102 Pseudo-Production Batch Selection Design + +> Date: 2026-04-20 +> Parent: `2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-plan.md` +> Upstream: `2026-04-20-sweep-015-direct-mock-partial-closure-plan.md` + +## Intent + +Select the first bounded pseudo-production validation batch after all 102 generated skills have passed local full direct mock execution. + +This design does not execute pseudo-production. It only defines the candidate selection rules, batch composition, evidence requirements, and stop conditions for the next execution stage. + +## Current Baseline + +1. Final materialized skills: `102 / 102` +2. Deterministic invocation readiness: `102 / 102` +3. Static validation: `102 / 102` +4. Dispatch dry-run: `102 / 102` +5. Full direct mock execution: `102 / 102` +6. Pseudo-production readiness: + - `pseudo-prod-ready`: `70` + - `real-env-required`: `32` + +## Selection Principle + +The first pseudo-production batch should be small, balanced, and low-risk. + +It should include only scenes that are: + +1. materialized +2. deterministic dispatch ready +3. static validated +4. direct mock pass +5. pseudo-prod-ready + +It should not include scenes that require host-bridge runtime, local-doc runtime, document export runtime, or other real-environment-only dependencies in the first batch. + +## Batch Shape + +The first batch should contain `10` scenes: + +1. `paginated_enrichment`: 4 +2. `multi_mode_request`: 2 +3. `single_request_enrichment`: 2 +4. `multi_endpoint_inventory`: 1 +5. `page_state_eval`: 1 + +`host_bridge_workflow` and `local_doc_pipeline` are explicitly excluded from the first pseudo-production batch because their readiness records require real environment dependencies. + +## Required Evidence Per Scene + +Each selected scene must produce or collect: + +1. console log +2. network log or request summary +3. screenshot if browser target page is required +4. exported file if an artifact is produced +5. generation report reference +6. deterministic invocation input used +7. final execution classification + +## Failure Taxonomy + +Pseudo-production execution results must classify failures as one of: + +1. `login-blocked` +2. `network-blocked` +3. `host-bridge-blocked` +4. `local-doc-runtime-blocked` +5. `data-mismatch` +6. `artifact-mismatch` +7. `environment-unavailable` +8. `runtime-error` + +## Forbidden Scope + +This design does not allow: + +1. executing browser automation +2. accessing production credentials +3. accessing real business systems +4. modifying generated skill packages +5. modifying `analyzer.rs`, `generator.rs`, or runtime dispatch +6. updating official board status +7. claiming production pass + +## Expected Output + +The output is a pseudo-production batch plan asset that names: + +1. selected scenes +2. deferred scenes +3. selection reasons +4. execution prerequisites +5. required evidence checklist +6. next execution plan input + diff --git a/docs/superpowers/specs/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-design.md b/docs/superpowers/specs/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-design.md new file mode 100644 index 0000000..83d3c49 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-scene-skill-102-static-mock-pseudoprod-validation-design.md @@ -0,0 +1,173 @@ +# Scene Skill 102 Static, Mock, And Pseudo-Production Validation Design + +> Date: 2026-04-20 +> Status: Draft +> Upstream Framework: `docs/superpowers/plans/2026-04-19-scene-skill-102-full-coverage-framework-plan.md` +> Upstream Materialization: `docs/superpowers/plans/2026-04-19-scene-skill-102-final-materialization-plan.md` +> Upstream Invocation Readiness: `docs/superpowers/plans/2026-04-20-scene-skill-102-deterministic-invocation-readiness-plan.md` + +## Intent + +Define the validation stage after the `102` scene set has reached: + +1. `102 / 102` final materialized skill packages +2. `102 / 102` deterministic invocation readiness using the `U+3002 x3` deterministic suffix +3. `0` materialization failures +4. `0` deterministic dispatch ambiguities + +This design does not extend the framework coverage work. It starts the next stage: proving that the materialized skill assets are structurally healthy, dispatchable, mock-runnable, and ready for a later real-environment validation campaign. + +## Current Baseline + +Fixed inputs: + +1. `examples/scene_skill_102_final_materialization_2026-04-19/skills` +2. `examples/scene_skill_102_final_materialization_2026-04-19/SCENE_INDEX.md` +3. `examples/scene_skill_102_final_materialization_2026-04-19/scene_skill_102_index.json` +4. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_manifest_2026-04-19.json` +5. `tests/fixtures/generated_scene/scene_skill_102_final_materialization_failures_2026-04-19.json` +6. `tests/fixtures/generated_scene/scene_skill_102_deterministic_invocation_readiness_after_keyword_refinement_2026-04-20.json` + +Current state: + +| Layer | Count | +| --- | ---: | +| materialized skill packages | 102 / 102 | +| deterministic invocation ready | 102 / 102 | +| known materialization failures | 0 | +| deterministic ambiguities | 0 | + +## Validation Layers + +This validation stage has four layers. Each layer answers a different question. + +### Layer 1: Static Package Validation + +Question: + +Can every materialized skill package be parsed, indexed, and inspected without executing browser or business runtime code? + +Checks: + +1. required files exist: + - `SKILL.toml` + - `SKILL.md` + - `scene.toml` + - `references/generation-report.json` + - at least one runtime script +2. TOML files parse successfully +3. JSON reports parse successfully +4. `scene.toml` references the expected `sceneId`, tool, suffix, and keyword fields +5. `SKILL.toml` contains stable machine name and human-readable display metadata +6. generated scripts are non-empty and referenced consistently + +Output status: + +`static-validated` or `static-invalid` + +### Layer 2: Deterministic Invocation Dry-Run + +Question: + +Can sgClaw select the correct skill for deterministic user input ending in the `U+3002 x3` suffix without using an LLM? + +Checks: + +1. full scene name plus the `U+3002 x3` suffix resolves to the expected skill +2. index sample utterance resolves to the expected skill +3. duplicate or ambiguous keyword matches are reported +4. scenes with parameter hints are flagged for later parameter validation + +This layer must not execute the selected skill. It only validates registry and dispatch behavior. + +Output status: + +`dispatch-dry-run-pass`, `dispatch-ambiguous`, or `dispatch-no-match` + +### Layer 3: Mock Runtime Validation + +Question: + +Can representative generated scripts execute their control flow against mocked browser, fetch, host-bridge, and local-doc dependencies? + +This layer is not full production validation. It only proves that generated scripts can run through their main control path with controlled fake responses. + +Checks: + +1. generated script module loads +2. entry function is callable +3. mock request paths are invoked in expected order +4. empty data and basic error data do not crash the script +5. artifact metadata path is produced when the archetype declares exports + +Scope: + +This layer should begin with archetype representatives, then expand only if the representative harness is stable. + +Output status: + +`mock-runtime-pass`, `mock-runtime-fail`, or `mock-runtime-not-covered` + +### Layer 4: Pseudo-Production Validation Plan + +Question: + +What must be true before moving from mock validation into real environment validation? + +This layer defines the pre-production checklist and evidence bundle. It does not require production credentials or real system access. + +Checklist: + +1. environment variable and runtime dependency inventory +2. browser or host-bridge dependency declaration +3. expected artifact type per skill +4. required screenshots, logs, HAR files, console logs, or generated artifacts for later real execution +5. pass/fail taxonomy for real-environment results + +Output status: + +`pseudo-prod-ready`, `pseudo-prod-blocked`, or `real-env-required` + +## Non-Goals + +This design does not: + +1. modify `src/generated_scene/analyzer.rs` +2. modify `src/generated_scene/generator.rs` +3. rematerialize the `102` skill packages +4. update `scene_execution_board_2026-04-18.json` +5. start browser-integrated production execution +6. require live credentials, VPN, SSO, or production network access +7. claim `102 / 102` real-sample executed-pass + +## Validation Status Model + +Each scene should eventually have independent statuses: + +1. `materializationStatus` +2. `deterministicDispatchStatus` +3. `staticValidationStatus` +4. `mockRuntimeStatus` +5. `pseudoProductionReadinessStatus` +6. `realEnvironmentExecutionStatus` + +This prevents the project from confusing generated skill availability with production correctness. + +## Expected Deliverables + +The implementation plan should produce: + +1. static validation result JSON +2. deterministic dry-run validation JSON +3. mock runtime readiness matrix +4. pseudo-production checklist +5. validation report +6. next-stage decision on whether to start real environment validation + +## Stop Rules + +Stop after publishing validation readiness assets and reports. + +Do not proceed into real production execution under this plan. + +Do not modify generated framework logic under this plan. diff --git a/docs/superpowers/specs/2026-04-20-sweep-012-materialization-recovery-design.md b/docs/superpowers/specs/2026-04-20-sweep-012-materialization-recovery-design.md new file mode 100644 index 0000000..60ef00a --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-sweep-012-materialization-recovery-design.md @@ -0,0 +1,37 @@ +# Sweep 012 Materialization Recovery Design + +> Date: 2026-04-20 +> Parent Plan: `2026-04-19-scene-skill-102-final-materialization-plan.md` + +## Intent + +Recover the single failed final materialization package: + +`sweep-012-scene / 业扩报装管理制度` + +The official framework board expects this scene to be `paginated_enrichment / A`, but final materialization fell into `host_bridge_workflow / C` and failed before writing required skill files. + +## Scope + +Allowed: + +1. Diagnose why this one scene materializes as `host_bridge_workflow`. +2. Apply the smallest bounded correction needed for this scene to materialize. +3. Re-run only `sweep-012-scene` into the existing final materialization root. +4. Refresh final materialization manifest, failures asset, human-readable index, and deterministic readiness for this scene. +5. Publish a recovery report. + +Forbidden: + +1. Do not rerun all 102 scenes. +2. Do not change other scene packages unless shared metadata refresh requires aggregate indexes. +3. Do not start static, mock, or production validation. +4. Do not update the official execution board. +5. Do not create a new family. + +## Completion Criteria + +1. `sweep-012-scene` has `SKILL.toml`, `SKILL.md`, `scene.toml`, and at least one script. +2. `sweep-012-scene` deterministic suffix is `。。。`. +3. Full-name deterministic dispatch selects `sweep-012-scene`. +4. Final materialization failure count becomes `0`. diff --git a/docs/superpowers/specs/2026-04-20-sweep-015-direct-mock-partial-closure-design.md b/docs/superpowers/specs/2026-04-20-sweep-015-direct-mock-partial-closure-design.md new file mode 100644 index 0000000..3ae77d6 --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-sweep-015-direct-mock-partial-closure-design.md @@ -0,0 +1,64 @@ +# Sweep 015 Direct Mock Partial Closure Design + +> Date: 2026-04-20 +> Parent: `2026-04-20-scene-skill-102-full-direct-mock-execution-plan.md` +> Scope: bounded mock-only closure for `sweep-015-scene` + +## Intent + +Close the single remaining `direct-mock-partial` from the 102 full direct mock run before starting pseudo-production batch selection. + +The only target scene is: + +- `sweep-015-scene / 任务报表` + +## Problem Statement + +The full direct mock execution produced: + +- `direct-mock-pass`: 101 +- `direct-mock-partial`: 1 + +The partial scene is `sweep-015-scene`. Its generated script loads and completes the mock runtime path, but returns artifact status `partial` because all mock rows are filtered out by the script-level business filter: + +- `FILTER_EXPR = "row.status == 5"` + +The full direct mock runner's generic fake row currently does not provide `status = 5`, so the mock data does not satisfy the generated skill's declared business filter. + +## Root Cause Classification + +This is a mock fixture contract gap, not a generator or generated-skill defect. + +Evidence: + +- `sweep-015-scene` generation report has readiness `A`. +- `sweep-015-scene` has complete `paginated_enrichment` workflow evidence. +- The script returns `partial` rather than throwing or failing to load. +- The mock runner's fake row lacks the field required by the script filter. + +## Allowed Changes + +1. Update the full direct mock runner fake data so the mock row satisfies `sweep-015-scene`'s filter contract. +2. Rerun full direct mock execution. +3. Refresh the full direct mock JSON/report. +4. Publish a closure report. + +## Forbidden Changes + +1. Do not modify generated skill packages. +2. Do not modify `src/generated_scene/analyzer.rs`. +3. Do not modify `src/generated_scene/generator.rs`. +4. Do not modify `src/generated_scene/ir.rs`. +5. Do not access real browser, real network, production credentials, or business systems. +6. Do not start pseudo-production batch selection under this design. + +## Expected Outcome + +The full direct mock result should become: + +- `direct-mock-pass`: 102 +- `direct-mock-partial`: 0 +- `direct-mock-fail`: 0 + +This only proves local mock runtime closure. It does not prove pseudo-production or production execution. + diff --git a/docs/superpowers/specs/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-design.md b/docs/superpowers/specs/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-design.md new file mode 100644 index 0000000..6a6673b --- /dev/null +++ b/docs/superpowers/specs/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-design.md @@ -0,0 +1,86 @@ +# Sweep-030 Deterministic Keyword / Alias Normalization Design + +## Intent + +Provide a bounded fix so `sweep-030-scene` can be deterministically matched from the service console input form used in the inner-network environment, without changing sgClaw runtime, callback-host behavior, or resolver logic. + +This design only targets the deterministic manifest surface of: + +- `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-030-scene/scene.toml` + +## Problem + +The current `sweep-030-scene` deterministic manifest only exposes one `include_keywords` entry: + +- `台区线损大数据-月_周累计线损率统计分析` + +But the real operator input uses a more natural phrase: + +- `兰州公司 台区线损大数据 月累计线损率统计分析。。。` + +Current deterministic dispatch requires `instruction.contains(keyword)`. Because the manifest keyword is too narrow and punctuation-sensitive, `include_hits = 0`, and dispatch returns the unsupported-scene prompt before the skill is selected. + +## Scope + +### In Scope + +- Update deterministic keyword / alias coverage for `sweep-030-scene` +- Preserve current suffix `。。。` +- Preserve current param declarations (`org`, `period`) +- Publish a route-local verification asset and report + +### Out Of Scope + +- Any change to `src/compat/scene_platform/dispatch.rs` +- Any change to resolver implementation +- Any change to callback-host, browser runtime, or helper-page lifecycle +- Any change to `bootstrap.target_url` +- Any change to official board or final materialization status +- Any broader `G2` family normalization outside `sweep-030-scene` + +## Design + +Normalize `sweep-030-scene` deterministic aliases so the scene can be matched by the natural phrases already used in inner-network testing. + +The deterministic alias set should cover at least: + +- `台区线损大数据-月_周累计线损率统计分析` +- `台区线损大数据 月累计线损率统计分析` +- `台区线损大数据 周累计线损率统计分析` +- `台区线损大数据 月累计` +- `台区线损大数据 周累计` +- `台区线损率统计分析` + +The alias set should remain specific enough not to collide with unrelated `G2` scenes. + +## Expected Result + +This fix should let the following type of input clear deterministic dispatch: + +- `兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。` + +This design does not claim to fix helper bootstrap or callback-host startup. It only ensures that `sweep-030-scene` is selected first, so the next layer can be tested correctly. + +## Allowed Files + +- `examples/scene_skill_102_final_materialization_2026-04-19/skills/sweep-030-scene/scene.toml` +- `tests/fixtures/generated_scene/sweep_030_deterministic_keyword_alias_normalization_2026-04-20.json` +- `docs/superpowers/reports/2026-04-20-sweep-030-deterministic-keyword-alias-normalization-report.md` + +## Forbidden Files + +- `src/compat/scene_platform/dispatch.rs` +- `src/browser/callback_host.rs` +- `src/service/server.rs` +- `src/generated_scene/*` +- `resources/rules.json` + +## Stop Rule + +Stop after: + +1. `sweep-030-scene` deterministic aliases are normalized +2. A route-local verification record is written +3. A report is published + +Do not proceed into helper-page / requesturl debugging within this plan. diff --git a/docs/superpowers/specs/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-design.md b/docs/superpowers/specs/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-design.md new file mode 100644 index 0000000..633cae0 --- /dev/null +++ b/docs/superpowers/specs/2026-04-21-generated-scene-local-doc-pipeline-residual-closure-design.md @@ -0,0 +1,56 @@ +# Generated Scene Local-Doc Pipeline Residual Closure Design + +Date: 2026-04-21 + +Parent status source: + +- `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` + +## Problem Statement + +After runtime-semantics hardening, rematerialization rerun, and validation refresh rerun, the remaining unresolved generated-scene residuals are narrowed to `6` `local_doc_pipeline` scenes: + +1. `sweep-025-scene` +2. `sweep-047-scene` +3. `sweep-050-scene` +4. `sweep-052-scene` +5. `sweep-062-scene` +6. `sweep-087-scene` + +All six currently fail-closed during generation because workflow evidence is still considered incomplete for archetype `local_doc_pipeline`. + +## Goal + +Create a bounded route that inspects only these six residual scenes, identifies the missing reusable workflow-evidence patterns, and closes them at generator/analyzer rule level so that the next rematerialization rerun can reduce or eliminate the remaining `local_doc_pipeline` failures. + +## Source-First Principle + +This route remains source-first: + +1. inspect the six original source scene directories +2. identify reusable evidence shapes that should count as valid `local_doc_pipeline` workflow evidence +3. encode only reusable evidence recovery, not one-off generated-output patching + +## Closure Target + +The route succeeds if it publishes a reusable first slice that makes the missing workflow evidence recognizable for the bounded six-scene bucket. + +The route does not itself claim final closure of all six scenes; final closure is proven only after downstream rematerialization rerun and validation refresh rerun. + +## Expected Reusable Focus + +The likely reusable closure surface is one or more of: + +1. doc-export evidence variants not currently recognized +2. local report-log / staging-file workflow shapes not currently recognized +3. query-leg + doc-export combinations that should count as `local_doc_pipeline` +4. evidence recovery from source-side helper scripts or embedded config files + +## Outputs + +This design leads to a bounded implementation plan that: + +1. fixes only generator/analyzer rule recovery for the six-scene bucket +2. publishes route-local followup JSON +3. publishes route-local report +4. does not rerun rematerialization or validation refresh inside the route diff --git a/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-design.md b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-design.md new file mode 100644 index 0000000..1da4477 --- /dev/null +++ b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-offline-validation-bundle-refresh-design.md @@ -0,0 +1,127 @@ +# Generated Scene Runtime Semantics Offline Validation Bundle Refresh Design + +Date: 2026-04-21 + +## Context + +The runtime-semantics hardening rerun has produced a refreshed 102-scene bundle: + +- `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` + +The validation refresh confirms: + +- `materialized = 102 / 102` +- `deterministicReady = 102 / 102` +- `staticPass = 102 / 102` +- `directMockPass = 102 / 102` +- `pseudoProdSelected = 7` + +The previous offline validation bundle under `dist/sgclaw_102_pseudoprod_validation_bundle_2026-04-20` is now stale because it was based on the pre-runtime-semantics materialization. + +## Problem + +Pseudo-production validation must not continue from the stale 2026-04-20 package. The inner-network operator needs a refreshed offline bundle that carries the new canonical 2026-04-21 runtime-semantics skills and the refreshed pseudo-production handoff assets. + +## Goal + +Create a bounded plan for refreshing a portable offline validation bundle for the 2026-04-21 runtime-semantics skill set. + +The bundle must be suitable for copying to an inner-network machine that does not have `cargo`, Rust sources, or repository test infrastructure. + +## Target Bundle + +Target directory: + +- `dist/sgclaw_102_runtime_semantics_validation_bundle_2026-04-21` + +Required shape: + +```text +dist/sgclaw_102_runtime_semantics_validation_bundle_2026-04-21/ + sg_claw.exe + skills/ + README.md + BATCH_001.md + BUNDLE_MANIFEST.json + docs/ + SCENE_INDEX.md + scene_skill_102_index.json + handoff/ + scene_skill_102_runtime_semantics_pseudoprod_execution_handoff_2026-04-21.json + scene_skill_102_runtime_semantics_pseudoprod_evidence_checklist_2026-04-21.json + scene_skill_102_runtime_semantics_pseudoprod_execution_record_template_2026-04-21.json + scene_skill_102_runtime_semantics_deterministic_invocation_readiness_2026-04-21.json + scene_skill_102_runtime_semantics_natural_language_parameter_readiness_2026-04-21.json + scene_skill_102_runtime_semantics_natural_language_invocation_samples_2026-04-21.json + scene_skill_102_runtime_semantics_full_direct_mock_execution_2026-04-21.json + resources/ + rules-102-business-targets-candidate.json + rules-102-business-targets-merged.json + rules-102-business-targets.patch + results/ + evidence/ +``` + +## Fixed Inputs + +1. `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` +2. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +3. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_failures_2026-04-21.json` +4. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_deterministic_invocation_readiness_2026-04-21.json` +5. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_natural_language_parameter_readiness_2026-04-21.json` +6. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_natural_language_invocation_samples_2026-04-21.json` +7. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_execution_handoff_2026-04-21.json` +8. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_evidence_checklist_2026-04-21.json` +9. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_pseudoprod_execution_record_template_2026-04-21.json` +10. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_full_direct_mock_execution_2026-04-21.json` +11. Current locally built `sg_claw.exe` +12. Optional rule assets under `resources/` + +## Required Bundle Semantics + +1. The `skills/` directory must be copied from the 2026-04-21 runtime-semantics rematerialization output. +2. The bundle must not reuse skills from `scene_skill_102_final_materialization_2026-04-19`. +3. The bundle must not reuse the old 2026-04-20 offline package as the source of truth. +4. The first batch must be derived from the refreshed 2026-04-21 pseudo-production handoff. +5. `BATCH_001.md` must show the exact natural-language input and page URL fields expected by `sg_claw_service_console.html`. +6. The README must explain that `skillsDir` must be configured with the exact JSON field name `skillsDir`. +7. The README must state that credentials, cookies, tokens, VPN secrets, and private keys must not be stored in the bundle. + +## Scope + +Allowed: + +1. Create a new bundle directory under `dist/`. +2. Copy the refreshed 102 skill packages into the bundle. +3. Copy refreshed handoff/readiness/mock assets into the bundle. +4. Copy rules candidate assets into the bundle when present. +5. Write bundle README, batch instructions, manifest, and empty evidence/results directories. +6. Write a bundle refresh report. + +Forbidden: + +1. No `src/` changes. +2. No generated skill edits. +3. No rematerialization rerun. +4. No validation refresh rerun. +5. No pseudo-production execution. +6. No production browser, production network, or credentials. +7. No official board updates. +8. No deletion of the old 2026-04-20 bundle. + +## Validation + +The bundle refresh execution must verify: + +1. `skills/` contains exactly 102 skill directories. +2. Every skill directory contains required package files. +3. Copied `scene.toml` files are parseable enough for structural presence checks. +4. `BUNDLE_MANIFEST.json` is valid JSON. +5. Copied handoff JSON assets are valid JSON. +6. `BATCH_001.md` contains exactly the selected pseudo-production batch entries from the refreshed handoff. + +## Stop Statement + +Stop after the refreshed offline bundle and bundle refresh report are published. + +Do not execute inner-network validation inside this route. diff --git a/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-design.md b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-design.md new file mode 100644 index 0000000..6f685e6 --- /dev/null +++ b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-post-refresh-residual-closure-design.md @@ -0,0 +1,63 @@ +# Generated Scene Runtime Semantics Post-Refresh Residual Closure Design + +Date: 2026-04-21 + +Parent execution: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-plan.md` + +## Intent + +Close the post-refresh residuals exposed by the 2026-04-21 validation refresh before any pseudo-production reuse is attempted. + +## Residual Scope + +This residual closure is strictly limited to the two regressions exposed by validation refresh: + +1. rematerialized `scene.toml` deterministic suffix regression +2. `sweep-078-scene` TOML generation corruption + +## Why A Separate Residual Stage Is Required + +The validation refresh proved that the hardened bundle is not yet a stable canonical bundle for downstream execution: + +1. `95` rematerialized scenes regressed from `suffix = "。。。"` to scene-name suffixes +2. `sweep-078-scene` emitted invalid TOML + +These are generator / serialization residuals, not pseudo-production or runtime-environment residuals. + +## Fixed Inputs + +1. `tests/fixtures/generated_scene/generated_scene_runtime_semantics_rematerialization_manifest_2026-04-21.json` +2. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_deterministic_invocation_readiness_2026-04-21.json` +3. `tests/fixtures/generated_scene/scene_skill_102_runtime_semantics_static_validation_2026-04-21.json` +4. `docs/superpowers/reports/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-report.md` + +## Required Closure Targets + +### Target A + +Restore deterministic suffix generation semantics for rematerialized `scene.toml` outputs so deterministic validation no longer collapses to `0 / 102`. + +### Target B + +Identify and close the serialization path that emitted invalid TOML in `sweep-078-scene`. + +## Non-Goals + +This residual closure does not include: + +1. pseudo-production execution +2. rematerialization of `local_doc_pipeline` residual scenes +3. runtime callback-host / helper debugging +4. service-console changes +5. new hardening routes outside the two residuals above + +## Expected Downstream + +After this residual closure, the next step should be: + +1. rerun rematerialization execution +2. rerun validation refresh + +Only then should pseudo-production selection be reconsidered. diff --git a/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-design.md b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-design.md new file mode 100644 index 0000000..e755cfd --- /dev/null +++ b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-design.md @@ -0,0 +1,55 @@ +# Generated Scene Runtime Semantics Rematerialization Execution Design + +Date: 2026-04-21 + +Parent dependency plan: + +- `docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-rematerialization-refresh-plan.md` + +## Purpose + +Execute the rematerialization refresh required after the source-first runtime semantics hardening routes. + +This design exists because the parent dependency plan explicitly forbids executing rematerialization inside itself. + +## Fixed Inputs + +1. Hardened generator implementation after these completed routes: + - `resolver_request_mapping_hardening` + - `runtime_url_classification_hardening` + - `embedded_dictionary_extraction_hardening` + - `parameter_default_semantics_recovery_hardening` + - `alias_generation_hardening` +2. Current source mapping from `generated_scene_source_evidence_cross_scan_2026-04-20.json`. +3. Current final materialization directory: + - `examples/scene_skill_102_final_materialization_2026-04-19` + +## Execution Target + +Create a refreshed final materialization directory using the hardened generator rules. + +Recommended output root: + +- `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` + +## Required Outputs + +1. Refreshed 102-skill materialization directory. +2. Refreshed materialization manifest. +3. Refreshed failures asset. +4. Refreshed human-readable scene index. +5. Rematerialization execution report. + +## Guardrails + +1. Do not manually edit generated skill files after generation. +2. Do not update official execution board. +3. Do not run production, browser, or intranet execution. +4. Do not run validation refresh in this plan. +5. Preserve old final materialization output as an audit artifact. + +## Stop Statement + +Stop after refreshed materialization assets and report are published. + +Do not execute validation refresh inside this plan. diff --git a/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-design.md b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-design.md new file mode 100644 index 0000000..e9d8de8 --- /dev/null +++ b/docs/superpowers/specs/2026-04-21-generated-scene-runtime-semantics-validation-refresh-execution-design.md @@ -0,0 +1,72 @@ +# Generated Scene Runtime Semantics Validation Refresh Execution Design + +Date: 2026-04-21 + +Parent dependency plan: + +- `docs/superpowers/plans/2026-04-20-generated-scene-runtime-semantics-validation-refresh-plan.md` + +Parent rematerialization execution: + +- `docs/superpowers/plans/2026-04-21-generated-scene-runtime-semantics-rematerialization-execution-plan.md` + +## Intent + +Execute the full validation refresh required after the hardened runtime-semantics rematerialization. + +## Why A Child Execution Plan Is Required + +The parent validation refresh plan is dependency-only and explicitly forbids executing validation refresh inside that plan. A separate execution plan is required to: + +1. consume the refreshed canonical 102-skill bundle +2. regenerate validation-layer assets against the refreshed bundle +3. keep validation evidence separate from the old pre-hardening assets + +## Fixed Validation Layers + +The execution must refresh these layers in order: + +1. deterministic invocation readiness +2. natural-language parameter readiness +3. static validation +4. direct mock execution +5. pseudo-production handoff assets + +## Canonical Input Bundle + +All validation in this execution must consume: + +- `examples/scene_skill_102_runtime_semantics_rematerialization_2026-04-21` + +Old validation assets from the pre-hardening bundle may be read for comparison only, but may not be reused as proof. + +## Required Outputs + +1. refreshed deterministic invocation readiness asset +2. refreshed natural-language parameter readiness asset +3. refreshed static validation asset +4. refreshed direct mock execution asset +5. refreshed pseudo-production handoff assets +6. one aggregated validation refresh report + +## Boundary + +This execution may: + +1. read the refreshed runtime-semantics bundle +2. regenerate validation JSON / markdown assets under `tests/fixtures/generated_scene/` and `docs/superpowers/reports/` +3. regenerate validation helper directories under `examples/` when the existing validation flow requires route-local outputs + +This execution may not: + +1. modify `src/` +2. modify the refreshed generated skills +3. rerun rematerialization +4. update the official board +5. start real browser / pseudo-production execution + +## Success Criteria + +1. validation refresh assets are published for every required layer +2. all refreshed assets explicitly reference the 2026-04-21 rematerialized bundle +3. residual validation blockers are recorded instead of silently reusing stale evidence diff --git a/frontend/scene-generator/config-loader.js b/frontend/scene-generator/config-loader.js index b3582dc..45caa02 100644 --- a/frontend/scene-generator/config-loader.js +++ b/frontend/scene-generator/config-loader.js @@ -7,6 +7,20 @@ function resolveProjectRoot() { return path.resolve(envRoot); } + // Search for Cargo.toml to find the actual project root + const candidates = [ + path.resolve(__dirname, "..", ".."), // claw-new (has Cargo.toml) + path.resolve(__dirname, "..", "..", "..", "claw-new"), // sgClaw/claw-new + path.resolve(__dirname, "..", "..", ".."), // sgClaw root + ]; + + for (const p of candidates) { + if (fs.existsSync(path.join(p, "Cargo.toml"))) { + return p; + } + } + + // Fallback: directory containing sgclaw_config.json const configPath = resolveConfigPath(); if (configPath && fs.existsSync(configPath)) { return path.dirname(configPath); diff --git a/frontend/scene-generator/generator-runner.js b/frontend/scene-generator/generator-runner.js index 9473e69..9f1c1d7 100644 --- a/frontend/scene-generator/generator-runner.js +++ b/frontend/scene-generator/generator-runner.js @@ -489,6 +489,9 @@ function collectDeterministicSignals(files, indexHtml) { const secondaryRequestMethods = new Set(); const pageTitleKeywords = new Set(); const staticParams = {}; + const g1eJoinKeys = new Set(); + const g1eAggregateRules = new Set(); + const g1eOutputColumns = []; for (const file of files) { const content = file.content; @@ -500,6 +503,25 @@ function collectDeterministicSignals(files, indexHtml) { } } + for (const match of content.matchAll(/\b(?:data|item|row)\.(wkOrderNo|countyCodeName|orgNo|orgCode)\b/g)) { + g1eJoinKeys.add(match[1]); + } + + for (const match of content.matchAll(/\b(com|batchCom)\s*\([^)]*?["']([A-Za-z_][A-Za-z0-9_]*)["']\s*,\s*["']([A-Za-z_][A-Za-z0-9_]*)["']/g)) { + g1eAggregateRules.add(`${match[1]}:${match[2]},${match[3]}`); + } + + const titleListBlock = content.match(/const\s+titleList\s*=\s*\[([\s\S]*?)\]\s*[;\n]/i); + if (titleListBlock) { + for (const triple of titleListBlock[1].matchAll(/\[\s*["']([A-Za-z_][A-Za-z0-9_]*)["']\s*,\s*["']([^"'`]+)["']\s*,\s*["']([^"'`]*)["']\s*\]/g)) { + const field = triple[1]; + const top = String(triple[2] || "").trim(); + const leaf = String(triple[3] || "").trim(); + if (!field || g1eOutputColumns.some((item) => item[0] === field)) continue; + g1eOutputColumns.push([field, leaf ? `${top}-${leaf}` : top]); + } + } + for (const match of content.matchAll(/\b(type|method)\s*:\s*['"`](GET|POST|PUT|DELETE|PATCH)['"`]/gi)) { methods.set(match[2].toUpperCase(), true); } @@ -557,8 +579,10 @@ function collectDeterministicSignals(files, indexHtml) { } const bootstrapCandidates = collectBootstrapCandidates(files, indexHtml, Array.from(urls.values())); + const allEndpoints = Array.from(urls.values()); + const g1eRequestRoles = deriveG1eRequestRoles(allEndpoints); return { - endpoints: Array.from(urls.values()), + endpoints: allEndpoints, methods: Array.from(methods.keys()), responsePaths: Array.from(responsePaths), branchFields: Array.from(branchFields), @@ -571,9 +595,39 @@ function collectDeterministicSignals(files, indexHtml) { pageTitleKeywords: Array.from(pageTitleKeywords).slice(0, 10), staticParams, bootstrapCandidates, + g1eMainRequest: g1eRequestRoles.mainRequest, + g1eEnrichmentRequests: g1eRequestRoles.enrichmentRequests, + g1eJoinKeys: Array.from(g1eJoinKeys), + g1eOutputColumns: g1eOutputColumns.slice(0, 24), + g1eAggregateRules: Array.from(g1eAggregateRules).slice(0, 12), }; } +function deriveG1eRequestRoles(endpoints) { + const mainRequest = endpoints.find((endpoint) => + /getwkorderall|all|list/i.test(endpoint?.name || "") && !containsRowBinding(endpoint?.requestTemplate) + ) || null; + const enrichmentRequests = (endpoints || []) + .filter((endpoint) => { + if (mainRequest && endpoint.url === mainRequest.url) return false; + return ( + containsRowBinding(endpoint?.requestTemplate) || + /query|info|acpt/i.test(endpoint?.name || "") || + /query|info|acpt/i.test(endpoint?.url || "") + ); + }) + .slice(0, 6); + return { mainRequest, enrichmentRequests }; +} + +function containsRowBinding(value) { + if (!value) return false; + if (typeof value === "string") return value.includes("${row."); + if (Array.isArray(value)) return value.some(containsRowBinding); + if (typeof value === "object") return Object.values(value).some(containsRowBinding); + return false; +} + function extractEndpoints(content) { const endpoints = []; const seen = new Set(); @@ -753,6 +807,9 @@ function buildDeterministicSceneIr(context, sourceDir) { const normalizeRules = buildNormalizeRules(signals); const params = buildParams(signals, workflowArchetype); const confidence = scoreConfidence(signals, workflowArchetype); + const mainRequest = buildG1eMainRequest(signals); + const enrichmentRequests = buildG1eEnrichmentRequests(signals); + const mergePlan = buildG1eMergePlan(signals); const readiness = buildReadiness({ sceneIdDiagnostics, workflowArchetype, @@ -760,6 +817,9 @@ function buildDeterministicSceneIr(context, sourceDir) { apiEndpoints: signals.endpoints || [], params, workflowSteps, + mainRequest, + enrichmentRequests, + mergePlan, confidence, }); @@ -776,8 +836,11 @@ function buildDeterministicSceneIr(context, sourceDir) { modeSwitchField: signals.branchFields?.find((field) => /mode|period/i.test(field)) || null, workflowSteps, workflowEvidence, - requestTemplate: {}, - responsePath, + mainRequest, + enrichmentRequests, + mergePlan, + requestTemplate: mainRequest?.requestTemplate || {}, + responsePath: mainRequest?.responsePath || responsePath, normalizeRules, artifactContract: { type: "report-artifact", @@ -796,13 +859,54 @@ function buildDeterministicSceneIr(context, sourceDir) { readiness, apiEndpoints: signals.endpoints || [], staticParams: signals.staticParams || {}, - columnDefs: [], + columnDefs: mergePlan?.outputColumns || [], confidence, uncertainties: buildUncertainties(signals, workflowArchetype), deterministicSignals: signals, }; } +function buildG1eMainRequest(signals) { + const endpoint = signals.g1eMainRequest || null; + if (!endpoint) return null; + return { + apiEndpoint: endpoint, + requestTemplate: endpoint.requestTemplate || {}, + responsePath: signals.responsePaths?.[0] || "", + columnDefs: signals.g1eOutputColumns || [], + }; +} + +function buildG1eEnrichmentRequests(signals) { + return (signals.g1eEnrichmentRequests || []).map((endpoint) => ({ + name: endpoint.name, + apiEndpoint: endpoint, + paramBindings: endpoint.requestTemplate || {}, + responsePath: signals.responsePaths?.[0] || "", + consumedFields: (signals.g1eAggregateRules || []) + .flatMap((rule) => String(rule).split(":")[1]?.split(",") || []) + .map((item) => item.trim()) + .filter(Boolean), + })); +} + +function buildG1eMergePlan(signals) { + if (!(signals.g1eJoinKeys || []).length && !(signals.g1eOutputColumns || []).length && !(signals.g1eAggregateRules || []).length) { + return null; + } + return { + joinKeys: signals.g1eJoinKeys || [], + fieldMappings: (signals.g1eOutputColumns || []).map(([field]) => ({ + outputField: field, + sourceType: (signals.g1eJoinKeys || []).includes(field) ? "main" : "aggregate", + sourceField: field, + requestName: null, + })), + aggregateRules: signals.g1eAggregateRules || [], + outputColumns: signals.g1eOutputColumns || [], + }; +} + function deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }) { const baseName = path.basename(sourceDir || ""); const candidates = []; @@ -951,6 +1055,12 @@ function classifyWorkflowArchetype(signals) { (signals.secondaryRequestMethods || []).length > 0 || businessEndpoints.length >= 2; const hasPostProcess = (signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0; + if (signals.g1eMainRequest && (signals.g1eEnrichmentRequests || []).length > 0) { + const hasG1eMergeSignal = (signals.g1eJoinKeys || []).length > 0 || (signals.g1eOutputColumns || []).length > 0; + if (hasG1eMergeSignal) { + return "single_request_enrichment"; + } + } if (hasPagination && hasSecondaryRequest && hasPostProcess) { return "paginated_enrichment"; } @@ -970,61 +1080,226 @@ function classifyWorkflowArchetype(signals) { return "single_request_table"; } +function createEvidenceItem({ + kind = "deterministic", + evidenceType = "signal", + layer = "business", + subject = "", + summary = "", + source = "runner", + confidence = 0.7, + payload = null, +}) { + return { + kind, + evidenceType, + layer, + subject: subject || evidenceType, + summary, + source, + confidence, + payload: payload && typeof payload === "object" ? payload : null, + }; +} + function buildEvidence(signals, workflowArchetype) { const evidence = []; const businessEndpoints = getBusinessEndpoints(signals); + const localDependencies = (signals.endpoints || []).filter((endpoint) => endpoint.role === "local_helper"); + const exportServices = (signals.endpoints || []).filter((endpoint) => endpoint.role === "export_service"); + const bootstrapCandidate = (signals.bootstrapCandidates || []).find((candidate) => candidate.validForBootstrap); - if (businessEndpoints.length > 0) { - evidence.push({ - kind: "deterministic", - summary: `Detected ${businessEndpoints.length} business API endpoint(s).`, - source: "runner", - confidence: 0.9, - }); + if (bootstrapCandidate) { + evidence.push( + createEvidenceItem({ + evidenceType: "bootstrap_candidate", + layer: "business", + subject: bootstrapCandidate.expectedDomain || bootstrapCandidate.targetUrl || "bootstrap", + summary: `Bootstrap candidate resolved to ${bootstrapCandidate.expectedDomain || bootstrapCandidate.targetUrl}.`, + confidence: 0.92, + payload: bootstrapCandidate, + }) + ); } - if ((signals.branchFields || []).length > 0) { - evidence.push({ - kind: "deterministic", - summary: `Branch fields: ${signals.branchFields.join(", ")}`, - source: "runner", - confidence: 0.86, - }); + for (const endpoint of businessEndpoints) { + evidence.push( + createEvidenceItem({ + evidenceType: "endpoint_candidate", + layer: "business", + subject: endpoint.name || endpoint.url, + summary: `Business endpoint ${endpoint.name || endpoint.url} detected.`, + confidence: 0.9, + payload: endpoint, + }) + ); } - if ((signals.paginationVars || []).length > 0) { - evidence.push({ - kind: "deterministic", - summary: `Pagination vars: ${signals.paginationVars.join(", ")}`, - source: "runner", - confidence: 0.84, - }); + if ((signals.branchFields || []).length > 0 || (signals.modeValues || []).length > 0) { + evidence.push( + createEvidenceItem({ + evidenceType: "mode_candidate", + layer: "business", + subject: workflowArchetype, + summary: `Mode signals: fields=${(signals.branchFields || []).join(", ") || "none"} values=${(signals.modeValues || []).join(", ") || "none"}`, + confidence: 0.86, + payload: { + branchFields: signals.branchFields || [], + modeValues: signals.modeValues || [], + }, + }) + ); } - if ((signals.secondaryRequestMethods || []).length > 0) { - evidence.push({ - kind: "deterministic", - summary: `Secondary request methods: ${signals.secondaryRequestMethods.join(", ")}`, - source: "runner", - confidence: 0.82, - }); + const requestSignals = uniqueStringValues([ + ...(signals.entryMethods || []), + ...Object.keys(signals.staticParams || {}), + ]); + if (requestSignals.length > 0) { + evidence.push( + createEvidenceItem({ + evidenceType: "request_template_candidate", + layer: "business", + subject: requestSignals[0], + summary: `Request-side signals detected: ${requestSignals.join(", ")}`, + confidence: 0.8, + payload: { + entryMethods: signals.entryMethods || [], + staticParams: signals.staticParams || {}, + }, + }) + ); } - if ((signals.exportMethods || []).length > 0) { - evidence.push({ - kind: "deterministic", - summary: `Export methods: ${signals.exportMethods.join(", ")}`, - source: "runner", - confidence: 0.78, - }); + if (signals.g1eMainRequest) { + evidence.push( + createEvidenceItem({ + evidenceType: "main_request_candidate", + layer: "business", + subject: signals.g1eMainRequest.name, + summary: `G1-E main request candidate resolved to ${signals.g1eMainRequest.name}.`, + confidence: 0.86, + payload: signals.g1eMainRequest, + }) + ); } - evidence.push({ - kind: "classification", - summary: `Workflow archetype classified as ${workflowArchetype}.`, - source: "runner", - confidence: 0.72, - }); + for (const endpoint of signals.g1eEnrichmentRequests || []) { + evidence.push( + createEvidenceItem({ + evidenceType: "enrichment_request_candidate", + layer: "business", + subject: endpoint.name, + summary: `G1-E enrichment request candidate resolved to ${endpoint.name}.`, + confidence: 0.84, + payload: endpoint, + }) + ); + } + + if ((signals.g1eJoinKeys || []).length || (signals.g1eOutputColumns || []).length || (signals.g1eAggregateRules || []).length) { + evidence.push( + createEvidenceItem({ + evidenceType: "merge_plan_candidate", + layer: "workflow", + subject: "g1e_merge_plan", + summary: "G1-E merge plan candidate detected.", + confidence: 0.83, + payload: { + joinKeys: signals.g1eJoinKeys || [], + outputColumns: signals.g1eOutputColumns || [], + aggregateRules: signals.g1eAggregateRules || [], + }, + }) + ); + } + + if ((signals.responsePaths || []).length > 0) { + evidence.push( + createEvidenceItem({ + evidenceType: "response_path_candidate", + layer: "business", + subject: signals.responsePaths[0], + summary: `Response paths detected: ${signals.responsePaths.join(", ")}`, + confidence: 0.84, + payload: { responsePaths: signals.responsePaths || [] }, + }) + ); + } + + if ((signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0) { + evidence.push( + createEvidenceItem({ + evidenceType: "normalize_rules_candidate", + layer: "business", + subject: "normalize_rules", + summary: `Post-process signals detected: filters=${(signals.filterExpressions || []).length}, exports=${(signals.exportMethods || []).length}`, + confidence: 0.76, + payload: { + filterExpressions: signals.filterExpressions || [], + exportMethods: signals.exportMethods || [], + }, + }) + ); + } + + if ((signals.paginationVars || []).length > 0 || (signals.secondaryRequestMethods || []).length > 0) { + evidence.push( + createEvidenceItem({ + evidenceType: "workflow_candidate", + layer: "workflow", + subject: workflowArchetype, + summary: `Workflow signals: pagination=${(signals.paginationVars || []).join(", ") || "none"} secondary=${(signals.secondaryRequestMethods || []).join(", ") || "none"}`, + confidence: 0.82, + payload: { + paginationVars: signals.paginationVars || [], + secondaryRequestMethods: signals.secondaryRequestMethods || [], + exportMethods: signals.exportMethods || [], + }, + }) + ); + } + + for (const endpoint of [...localDependencies, ...exportServices]) { + evidence.push( + createEvidenceItem({ + evidenceType: "localhost_dependency_candidate", + layer: "host_runtime", + subject: endpoint.name || endpoint.url, + summary: `Host runtime dependency detected: ${endpoint.url}`, + confidence: 0.88, + payload: endpoint, + }) + ); + } + + if ((signals.exportMethods || []).length > 0 || exportServices.length > 0) { + evidence.push( + createEvidenceItem({ + evidenceType: "export_candidate", + layer: "output", + subject: signals.exportMethods?.[0] || exportServices[0]?.name || "export", + summary: `Export signals detected: ${(signals.exportMethods || []).join(", ") || exportServices.map((item) => item.url).join(", ")}`, + confidence: 0.78, + payload: { + exportMethods: signals.exportMethods || [], + exportEndpoints: exportServices, + }, + }) + ); + } + + evidence.push( + createEvidenceItem({ + kind: "classification", + evidenceType: "workflow_candidate", + layer: "classification", + subject: workflowArchetype, + summary: `Workflow archetype classified as ${workflowArchetype}.`, + confidence: 0.72, + payload: { workflowArchetype }, + }) + ); return evidence; } @@ -1086,6 +1361,27 @@ function buildWorkflowSteps(signals, workflowArchetype) { const primaryEndpoint = businessEndpoints[0]?.name || null; const secondaryEndpoint = businessEndpoints[1]?.name || null; + if (workflowArchetype === "single_request_enrichment") { + steps.push({ + type: "request", + entry: signals.entryMethods?.[0] || null, + endpoint: signals.g1eMainRequest?.name || primaryEndpoint, + description: "Query the main list for G1-E workflow.", + }); + for (const endpoint of signals.g1eEnrichmentRequests || []) { + steps.push({ + type: "enrichment_request", + endpoint: endpoint.name, + description: "Fetch lightweight enrichment payload.", + }); + } + steps.push({ + type: "transform", + description: "Merge enrichment payloads into aggregate output.", + }); + return steps; + } + if (workflowArchetype === "multi_mode_request") { steps.push({ type: "request", @@ -1238,6 +1534,9 @@ function buildUncertainties(signals, workflowArchetype) { if (workflowArchetype === "paginated_enrichment" && !(signals.filterExpressions || []).length && !(signals.exportMethods || []).length) { issues.push("Paginated enrichment is missing post-process evidence."); } + if (workflowArchetype === "single_request_enrichment" && !(signals.g1eOutputColumns || []).length) { + issues.push("G1-E output columns are still weakly inferred."); + } return issues; } @@ -1255,10 +1554,17 @@ function scoreConfidence(signals, workflowArchetype) { ) { score += 0.14; } + if ( + workflowArchetype === "single_request_enrichment" && + signals.g1eMainRequest && + (signals.g1eEnrichmentRequests || []).length > 0 + ) { + score += 0.14; + } return Math.min(0.95, Number(score.toFixed(2))); } -function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints, params, workflowSteps, confidence }) { +function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints, params, workflowSteps, mainRequest, enrichmentRequests, mergePlan, confidence }) { const risks = []; const missingPieces = []; const notes = []; @@ -1288,6 +1594,37 @@ function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiE risks.push("Workflow steps are incomplete."); } + const requestContract = evaluateRequestContract({ + workflowArchetype, + apiEndpoints, + params, + workflowSteps, + }); + if (!requestContract.passed) { + missingPieces.push(requestContract.reason || "request_contract"); + risks.push(requestContract.message); + } + + const responseContract = evaluateResponseContract({ + workflowArchetype, + workflowSteps, + apiEndpoints, + }); + if (!responseContract.passed) { + missingPieces.push(responseContract.reason || "response_contract"); + risks.push(responseContract.message); + } + + const workflowContract = evaluateWorkflowContract({ + workflowArchetype, + workflowSteps, + businessApiEndpoints, + }); + if (!workflowContract.passed) { + missingPieces.push(workflowContract.reason || "workflow_contract"); + risks.push(workflowContract.message); + } + if (workflowArchetype === "paginated_enrichment") { const hasPaginate = workflowSteps.some((step) => step.type === "paginate"); const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request"); @@ -1306,6 +1643,21 @@ function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiE } } + if (workflowArchetype === "single_request_enrichment") { + if (!mainRequest) { + missingPieces.push("main_request"); + risks.push("G1-E workflow is missing a resolved main request."); + } + if (!(enrichmentRequests || []).length) { + missingPieces.push("enrichment_requests"); + risks.push("G1-E workflow is missing enrichment request contracts."); + } + if (!mergePlan) { + missingPieces.push("merge_plan"); + risks.push("G1-E workflow is missing merge plan evidence."); + } + } + if (workflowArchetype === "multi_mode_request" && !params.some((param) => param.name === "period")) { risks.push("Mode-aware workflow is missing a resolved period parameter."); } @@ -1340,15 +1692,25 @@ function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiE passed: Boolean(bootstrap.targetUrl || bootstrap.expectedDomain), reason: bootstrap.targetUrl || bootstrap.expectedDomain ? null : "bootstrap_target", }, + { + name: "request_contract_complete", + passed: requestContract.passed, + reason: requestContract.passed ? null : requestContract.reason, + }, + { + name: "response_contract_complete", + passed: responseContract.passed, + reason: responseContract.passed ? null : responseContract.reason, + }, + { + name: "workflow_contract_complete", + passed: workflowContract.passed, + reason: workflowContract.passed ? null : workflowContract.reason, + }, { name: "workflow_complete_for_archetype", - passed: !missingPieces.some((item) => - ["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item) - ), - reason: - missingPieces.find((item) => - ["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item) - ) || null, + passed: workflowContract.passed, + reason: workflowContract.passed ? null : workflowContract.reason, }, { name: "runtime_contract_compatible", @@ -1361,6 +1723,26 @@ function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiE ? null : "runtime_contract_incompatible", }, + { + name: "main_request_resolved", + passed: workflowArchetype !== "single_request_enrichment" || Boolean(mainRequest), + reason: workflowArchetype !== "single_request_enrichment" || mainRequest ? null : "main_request", + }, + { + name: "enrichment_requests_resolved", + passed: workflowArchetype !== "single_request_enrichment" || Boolean((enrichmentRequests || []).length), + reason: workflowArchetype !== "single_request_enrichment" || (enrichmentRequests || []).length ? null : "enrichment_requests", + }, + { + name: "merge_plan_resolved", + passed: workflowArchetype !== "single_request_enrichment" || Boolean(mergePlan), + reason: workflowArchetype !== "single_request_enrichment" || mergePlan ? null : "merge_plan", + }, + { + name: "g1e_scope_compatible", + passed: workflowArchetype !== "single_request_enrichment" || Boolean(mainRequest && (enrichmentRequests || []).length && mergePlan), + reason: workflowArchetype !== "single_request_enrichment" || (mainRequest && (enrichmentRequests || []).length && mergePlan) ? null : "g1e_scope", + }, ]; return { @@ -1373,6 +1755,175 @@ function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiE }; } +function evaluateRequestContract({ workflowArchetype, apiEndpoints, params, workflowSteps }) { + const endpointCount = (apiEndpoints || []).filter((endpoint) => + ["business_api", "gateway_api", "business_entry"].includes(endpoint.role) + ).length; + const hasRequestStep = (workflowSteps || []).some((step) => ["request", "paginate", "secondary_request"].includes(step.type)); + const hasRuntimeInputs = (params || []).length > 0; + + if (workflowArchetype === "multi_mode_request") { + const periodParamReady = (params || []).some((param) => param.name === "period"); + return periodParamReady && endpointCount > 0 + ? { passed: true } + : { + passed: false, + reason: endpointCount > 0 ? "request_mode_param" : "request_endpoint", + message: endpointCount > 0 + ? "Multi-mode request is missing a resolved mode/period contract." + : "Request contract is missing a business endpoint.", + }; + } + + if (workflowArchetype === "single_request_enrichment") { + return endpointCount >= 2 && hasRequestStep + ? { passed: true } + : { + passed: false, + reason: endpointCount >= 2 ? "main_request" : "request_endpoint", + message: endpointCount >= 2 + ? "G1-E workflow is missing a resolved main request." + : "G1-E workflow requires both main and enrichment business endpoints.", + }; + } + + if (workflowArchetype === "paginated_enrichment") { + return endpointCount >= 2 && hasRequestStep + ? { passed: true } + : { + passed: false, + reason: endpointCount >= 2 ? "request_workflow" : "request_endpoint", + message: endpointCount >= 2 + ? "Paginated enrichment is missing request-side workflow signals." + : "Paginated enrichment requires both primary and secondary request endpoints.", + }; + } + + if (workflowArchetype === "page_state_eval") { + return hasRequestStep || endpointCount > 0 + ? { passed: true } + : { + passed: false, + reason: "request_workflow", + message: "Page-state workflow is missing request or state-evaluation entry signals.", + }; + } + + return endpointCount > 0 || hasRuntimeInputs + ? { passed: true } + : { + passed: false, + reason: "request_endpoint", + message: "Request contract is missing a business endpoint or runtime input.", + }; +} + +function evaluateResponseContract({ workflowArchetype, workflowSteps, apiEndpoints }) { + const endpointCount = (apiEndpoints || []).filter((endpoint) => + ["business_api", "gateway_api", "business_entry"].includes(endpoint.role) + ).length; + const hasTransform = (workflowSteps || []).some((step) => ["transform", "filter", "export"].includes(step.type)); + + if (workflowArchetype === "single_request_enrichment") { + return endpointCount >= 2 && hasTransform + ? { passed: true } + : { + passed: false, + reason: endpointCount >= 2 ? "merge_plan" : "response_path", + message: endpointCount >= 2 + ? "G1-E workflow is missing merge/transform evidence." + : "G1-E workflow lacks enough response-side endpoint evidence.", + }; + } + + if (workflowArchetype === "paginated_enrichment") { + return endpointCount >= 2 + ? { passed: true } + : { + passed: false, + reason: "response_path", + message: "Paginated enrichment lacks enough response-side endpoints to confirm extraction.", + }; + } + + if (workflowArchetype === "page_state_eval") { + return { passed: true }; + } + + return endpointCount > 0 || hasTransform + ? { passed: true } + : { + passed: false, + reason: "response_path", + message: "Response contract is missing extraction or transform evidence.", + }; +} + +function evaluateWorkflowContract({ workflowArchetype, workflowSteps, businessApiEndpoints }) { + const hasAnyWorkflow = (workflowSteps || []).length > 0; + if (!hasAnyWorkflow) { + return { + passed: false, + reason: "workflow_steps", + message: "Workflow contract is missing executable steps.", + }; + } + + if (workflowArchetype === "paginated_enrichment") { + const hasPaginate = workflowSteps.some((step) => step.type === "paginate"); + const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request"); + const hasPostProcess = workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type)); + if (!hasPaginate) { + return { + passed: false, + reason: "paginate_step", + message: "Paginated enrichment lacks pagination evidence.", + }; + } + if (!hasSecondary || (businessApiEndpoints || []).length < 2) { + return { + passed: false, + reason: "secondary_request", + message: "Paginated enrichment lacks a strong secondary request signal.", + }; + } + if (!hasPostProcess) { + return { + passed: false, + reason: "post_process", + message: "Paginated enrichment lacks filter/transform/export evidence.", + }; + } + } + + if (workflowArchetype === "single_request_enrichment") { + const hasRequest = workflowSteps.some((step) => step.type === "request"); + const hasEnrichment = workflowSteps.some((step) => step.type === "enrichment_request"); + const hasTransform = workflowSteps.some((step) => step.type === "transform"); + if (!hasRequest || !hasEnrichment || !hasTransform) { + return { + passed: false, + reason: !hasRequest ? "workflow_request" : !hasEnrichment ? "enrichment_requests" : "workflow_transform", + message: "G1-E workflow lacks a complete main/enrichment/transform chain.", + }; + } + } + + if (workflowArchetype === "multi_mode_request") { + const hasRequest = workflowSteps.some((step) => step.type === "request"); + const hasTransform = workflowSteps.some((step) => step.type === "transform"); + if (!hasRequest || !hasTransform) { + return { + passed: false, + reason: !hasRequest ? "workflow_request" : "workflow_transform", + message: "Multi-mode request lacks a complete request/transform workflow.", + }; + } + } + + return { passed: true }; +} + function slugifyAscii(value) { return String(value || "") .replace(/([a-z0-9])([A-Z])/g, "$1-$2") diff --git a/frontend/scene-generator/llm-client.js b/frontend/scene-generator/llm-client.js index 54fda63..3da3e84 100644 --- a/frontend/scene-generator/llm-client.js +++ b/frontend/scene-generator/llm-client.js @@ -26,7 +26,7 @@ Schema: "sceneId": "string", "sceneName": "string", "sceneKind": "report_collection|monitoring", - "workflowArchetype": "single_request_table|multi_mode_request|paginated_enrichment|page_state_eval", + "workflowArchetype": "single_request_table|single_request_enrichment|multi_mode_request|paginated_enrichment|page_state_eval", "bootstrap": { "expectedDomain": "string", "targetUrl": "string", @@ -67,6 +67,27 @@ Schema: "secondaryRequestEntries": ["string"], "postProcessSteps": ["string"] }, + "mainRequest": { + "apiEndpoint": { "name": "string", "url": "string", "method": "POST", "contentType": "string", "description": "string" }, + "requestTemplate": {}, + "responsePath": "string", + "columnDefs": [["field", "label"]] + }, + "enrichmentRequests": [ + { + "name": "string", + "apiEndpoint": { "name": "string", "url": "string", "method": "POST", "contentType": "string", "description": "string" }, + "paramBindings": {}, + "responsePath": "string", + "consumedFields": ["string"] + } + ], + "mergePlan": { + "joinKeys": ["string"], + "fieldMappings": [{ "outputField": "string", "sourceType": "main|aggregate|enrichment", "sourceField": "string", "requestName": "string|null" }], + "aggregateRules": ["string"], + "outputColumns": [["field", "label"]] + }, "requestTemplate": {}, "responsePath": "string", "normalizeRules": { "type": "validate_required", "requiredFields": ["string"], "filterNull": true }, diff --git a/frontend/scene-generator/server.js b/frontend/scene-generator/server.js index 15940a4..fac4960 100644 --- a/frontend/scene-generator/server.js +++ b/frontend/scene-generator/server.js @@ -7,17 +7,19 @@ const path = require("path"); const { spawn } = require("child_process"); const { loadConfig, getDefaults } = require("./config-loader"); const { analyzeScene, analyzeSceneDeep } = require("./llm-client"); -const { runGenerator, readDirectory } = require("./generator-runner"); +const { runGenerator, readDirectory, validateSceneIdCandidate } = require("./generator-runner"); let config; let defaults; try { config = loadConfig(); defaults = getDefaults(); - console.log(`[config] Loaded from: ${config.configPath}`); - console.log(`[config] Project root: ${config.projectRoot}`); -} catch (err) { - console.error(`[error] Failed to load config: ${err.message}`); + if (require.main === module) { + console.log(`[config] Loaded from: ${config.configPath}`); + console.log(`[config] Project root: ${config.projectRoot}`); + } +} catch (error) { + console.error(`[error] Failed to load config: ${error.message}`); process.exit(1); } @@ -34,11 +36,9 @@ const MIME_TYPES = { function serveStatic(res, filePath) { const ext = path.extname(filePath); const contentType = MIME_TYPES[ext] || "application/octet-stream"; - - fs.readFile(filePath, (err, data) => { - if (err) { - res.writeHead(404); - res.end("Not found"); + fs.readFile(filePath, (error, data) => { + if (error) { + writeJson(res, 404, { error: "Not found" }); return; } res.writeHead(200, { "Content-Type": contentType }); @@ -61,14 +61,24 @@ function writeSSE(res, event, data) { res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`); } +function writeJson(res, statusCode, payload) { + res.writeHead(statusCode, { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }); + res.end(JSON.stringify(payload)); +} + function parseBody(req) { return new Promise((resolve, reject) => { let body = ""; - req.on("data", (chunk) => (body += chunk)); + req.on("data", (chunk) => { + body += chunk; + }); req.on("end", () => { try { - resolve(JSON.parse(body)); - } catch (err) { + resolve(body ? JSON.parse(body) : {}); + } catch (_) { reject(new Error("Invalid JSON")); } }); @@ -80,40 +90,33 @@ async function handleAnalyze(req, res) { let body; try { body = await parseBody(req); - } catch { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Invalid JSON body" })); + } catch (_) { + writeJson(res, 400, { error: "Invalid JSON body" }); return; } - const sourceDir = (body.sourceDir || "").replace(/\\/g, "/"); + const sourceDir = normalizeInputPath(body.sourceDir); if (!sourceDir) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "sourceDir is required" })); + writeJson(res, 400, { error: "sourceDir is required" }); return; } let dirContents; try { dirContents = readDirectory(sourceDir); - } catch (err) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: err.message })); + } catch (error) { + writeJson(res, 400, { error: error.message }); return; } try { const result = await analyzeScene(sourceDir, dirContents, config); - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify(result)); - } catch (err) { - res.writeHead(502, { "Content-Type": "application/json" }); - res.end( - JSON.stringify({ - error: `LLM analysis failed: ${err.message}`, - hint: "You can still enter scene-id and scene-name manually", - }) - ); + writeJson(res, 200, result); + } catch (error) { + writeJson(res, 502, { + error: `LLM analysis failed: ${error.message}`, + hint: "You can still enter scene-id and scene-name manually.", + }); } } @@ -121,112 +124,157 @@ async function handleAnalyzeDeep(req, res) { let body; try { body = await parseBody(req); - } catch { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Invalid JSON body" })); + } catch (_) { + writeJson(res, 400, { error: "Invalid JSON body" }); return; } - const sourceDir = (body.sourceDir || "").replace(/\\/g, "/"); + const sourceDir = normalizeInputPath(body.sourceDir); if (!sourceDir) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "sourceDir is required" })); + writeJson(res, 400, { error: "sourceDir is required" }); return; } let dirContents; try { dirContents = readDirectory(sourceDir); - } catch (err) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: err.message })); + } catch (error) { + writeJson(res, 400, { error: error.message }); return; } - try { - const indexHtmlContent = dirContents.indexHtml || null; - const result = await analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, config); + const deterministic = sanitizeSceneIr(dirContents.deterministic || {}); + const warnings = []; + const sources = { + deterministic: true, + llm: false, + }; + let llmSceneIr = null; + let llmError = null; - // Log extraction results for debugging - console.log(`[analyze-deep] Extracted scene: ${result.sceneId} / ${result.sceneName}`); - console.log(`[analyze-deep] API endpoints: ${result.apiEndpoints?.length || 0}`); - console.log(`[analyze-deep] Column defs: ${result.columnDefs?.length || 0}`); - - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify(result)); - } catch (err) { - console.error(`[analyze-deep] Error: ${err.message}`); - res.writeHead(502, { "Content-Type": "application/json" }); - res.end( - JSON.stringify({ - error: `Deep analysis failed: ${err.message}`, - hint: "You can still use basic analysis or enter data manually", - }) - ); + if (hasUsableLlmConfig(config)) { + try { + llmSceneIr = sanitizeSceneIr(await analyzeSceneDeep(sourceDir, dirContents, config)); + sources.llm = true; + } catch (error) { + llmError = error.message; + warnings.push(`LLM semantic completion failed: ${error.message}`); + } + } else { + warnings.push("LLM semantic completion skipped because API config is incomplete."); } + + const merged = mergeSceneIr(deterministic, llmSceneIr, warnings); + merged.analysisMeta = { + sourceDir, + sources, + llmError, + warnings, + deterministicSignals: dirContents.analysisContext?.deterministicSignals || {}, + }; + + console.log( + `[analyze-deep] ${merged.sceneId} / ${merged.sceneName} archetype=${merged.workflowArchetype || "unknown"} readiness=${merged.readiness?.level || "?"}` + ); + + writeJson(res, 200, merged); } async function handleGenerate(req, res) { let body; try { body = await parseBody(req); - } catch { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Invalid JSON body" })); + } catch (_) { + writeJson(res, 400, { error: "Invalid JSON body" }); return; } - const { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson } = body; + const sourceDir = normalizeInputPath(body.sourceDir); + const sceneId = stringValue(body.sceneId); + const sceneName = stringValue(body.sceneName); + const sceneKind = stringValue(body.sceneKind); + const targetUrl = stringValue(body.targetUrl); + const outputRoot = normalizeInputPath(body.outputRoot); + const lessons = normalizeInputPath(body.lessons); + const sceneInfoJson = normalizeJsonInput(body.sceneInfoJson); + const sceneIrJson = normalizeJsonInput(body.sceneIrJson); + if (!sourceDir || !sceneId || !sceneName || !outputRoot) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end( - JSON.stringify({ - error: - "All fields required: sourceDir, sceneId, sceneName, outputRoot", - }) - ); + writeJson(res, 400, { + error: "All fields required: sourceDir, sceneId, sceneName, outputRoot", + }); return; } const sseWriter = initSSE(res); - try { + const sceneIr = sceneIrJson ? sanitizeSceneIr(JSON.parse(sceneIrJson)) : null; + const blockers = getGenerationBlockers({ + sceneIr, + sceneId, + sceneName, + sourceDir, + }); + if (blockers.length) { + writeSSE(sseWriter, "error", { + message: `Generation blocked: ${blockers.join(", ")}`, + }); + writeSSE(sseWriter, "complete", { + success: false, + blocked: true, + blockers, + readiness: sceneIr?.readiness || null, + workflowArchetype: sceneIr?.workflowArchetype || null, + confidence: sceneIr?.confidence || 0, + }); + return; + } await runGenerator( - { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson }, + { + sourceDir, + sceneId, + sceneName, + sceneKind: sceneKind || null, + targetUrl: targetUrl || null, + outputRoot, + lessons: lessons || null, + sceneInfoJson, + sceneIrJson, + completionMeta: sceneIr + ? { + readiness: sceneIr.readiness || null, + workflowArchetype: sceneIr.workflowArchetype || null, + confidence: sceneIr.confidence || 0, + } + : null, + }, sseWriter, config.projectRoot ); - } catch (err) { - writeSSE(sseWriter, "error", { message: `Server error: ${err.message}` }); + } catch (error) { + writeSSE(sseWriter, "error", { message: `Server error: ${error.message}` }); } - sseWriter.end(); } function handleHealth(req, res) { - res.writeHead(200, { "Content-Type": "application/json" }); - res.end( - JSON.stringify({ - status: "ok", - pid: process.pid, - configLoaded: true, - configPath: config.configPath, - projectRoot: config.projectRoot, - }) - ); + writeJson(res, 200, { + status: "ok", + pid: process.pid, + configLoaded: true, + configPath: config.configPath, + projectRoot: config.projectRoot, + defaults, + }); } -/** - * Open a native Windows folder selection dialog using PowerShell. - * Returns the selected folder path or null if cancelled. - */ function openFolderDialog(defaultPath) { return new Promise((resolve) => { const psScript = ` [Console]::OutputEncoding = [System.Text.Encoding]::UTF8 Add-Type -AssemblyName System.Windows.Forms $dialog = New-Object System.Windows.Forms.FolderBrowserDialog -$dialog.Description = "选择文件夹" +$dialog.Description = "Select a folder" $dialog.ShowNewFolderButton = true ${defaultPath ? `$dialog.SelectedPath = '${defaultPath.replace(/'/g, "''")}'` : ""} if ($dialog.ShowDialog() -eq 'OK') { @@ -234,42 +282,28 @@ if ($dialog.ShowDialog() -eq 'OK') { } `.trim(); - const ps = spawn("powershell.exe", [ - "-NoProfile", - "-NonInteractive", - "-Command", - psScript, - ], { - windowsHide: true, - }); + const ps = spawn( + "powershell.exe", + ["-NoProfile", "-NonInteractive", "-Command", psScript], + { windowsHide: true } + ); let output = ""; - let error = ""; - ps.stdout.on("data", (data) => { output += data.toString("utf8"); }); - - ps.stderr.on("data", (data) => { - error += data.toString("utf8"); - }); - ps.on("close", (code) => { if (code === 0 && output.trim()) { - // 移除可能的 BOM 标记 - let path = output.trim(); - if (path.charCodeAt(0) === 0xFEFF) { - path = path.slice(1); + let selected = output.trim(); + if (selected.charCodeAt(0) === 0xfeff) { + selected = selected.slice(1); } - resolve(path); - } else { - resolve(null); + resolve(selected); + return; } - }); - - ps.on("error", () => { resolve(null); }); + ps.on("error", () => resolve(null)); }); } @@ -277,30 +311,25 @@ async function handleSelectFolder(req, res) { let body = {}; try { body = await parseBody(req); - } catch { - // ignore parse error, use empty body - } + } catch (_) {} const selectedPath = await openFolderDialog(body.defaultPath || ""); - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ path: selectedPath })); + writeJson(res, 200, { path: selectedPath }); } async function handleSelectFile(req, res) { let body = {}; try { body = await parseBody(req); - } catch { - // ignore parse error - } + } catch (_) {} - const filter = body.filter || "所有文件 (*.*)|*.*"; + const filter = body.filter || "All files (*.*)|*.*"; const psScript = ` [Console]::OutputEncoding = [System.Text.Encoding]::UTF8 Add-Type -AssemblyName System.Windows.Forms $dialog = New-Object System.Windows.Forms.OpenFileDialog -$dialog.Filter = '${filter}' -$dialog.Title = "选择文件" +$dialog.Filter = '${filter.replace(/'/g, "''")}' +$dialog.Title = "Select a file" ${body.defaultPath ? `$dialog.InitialDirectory = '${body.defaultPath.replace(/'/g, "''")}'` : ""} if ($dialog.ShowDialog() -eq 'OK') { Write-Output $dialog.FileName @@ -308,110 +337,1039 @@ if ($dialog.ShowDialog() -eq 'OK') { `.trim(); return new Promise((resolve) => { - const ps = spawn("powershell.exe", [ - "-NoProfile", - "-NonInteractive", - "-Command", - psScript, - ], { - windowsHide: true, - }); + const ps = spawn( + "powershell.exe", + ["-NoProfile", "-NonInteractive", "-Command", psScript], + { windowsHide: true } + ); let output = ""; - ps.stdout.on("data", (data) => { output += data.toString("utf8"); }); ps.on("close", (code) => { - let path = output.trim(); - if (path.charCodeAt(0) === 0xFEFF) { - path = path.slice(1); + let selected = output.trim(); + if (selected.charCodeAt(0) === 0xfeff) { + selected = selected.slice(1); } - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ path: code === 0 && path ? path : null })); + writeJson(res, 200, { path: code === 0 && selected ? selected : null }); resolve(); }); ps.on("error", () => { - res.writeHead(200, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ path: null })); + writeJson(res, 200, { path: null }); resolve(); }); }); } -const server = http.createServer(async (req, res) => { - const url = new URL(req.url, `http://${req.headers.host}`); - const pathname = url.pathname; +function hasUsableLlmConfig(currentConfig) { + return Boolean( + currentConfig && + stringValue(currentConfig.apiKey) && + stringValue(currentConfig.baseUrl) && + stringValue(currentConfig.model) + ); +} - if (req.method === "OPTIONS") { - res.writeHead(204, { - "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "GET, POST, OPTIONS", - "Access-Control-Allow-Headers": "Content-Type", - }); - res.end(); - return; +function sanitizeSceneIr(sceneIr) { + const value = sceneIr && typeof sceneIr === "object" ? JSON.parse(JSON.stringify(sceneIr)) : {}; + value.sceneId = stringValue(value.sceneId); + value.sceneIdDiagnostics = sanitizeSceneIdDiagnostics(value.sceneIdDiagnostics); + value.sceneName = stringValue(value.sceneName) || "Generated Scene"; + value.sceneKind = stringValue(value.sceneKind) || "report_collection"; + value.workflowArchetype = stringValue(value.workflowArchetype) || "single_request_table"; + value.bootstrap = sanitizeBootstrap(value.bootstrap); + value.params = sanitizeParams(value.params); + value.modes = sanitizeModes(value.modes); + value.defaultMode = stringValue(value.defaultMode) || null; + value.modeSwitchField = stringValue(value.modeSwitchField) || null; + value.workflowSteps = sanitizeWorkflowSteps(value.workflowSteps); + value.workflowEvidence = sanitizeWorkflowEvidence(value.workflowEvidence); + value.mainRequest = sanitizeMainRequest(value.mainRequest); + value.enrichmentRequests = sanitizeEnrichmentRequests(value.enrichmentRequests); + value.mergePlan = sanitizeMergePlan(value.mergePlan); + value.requestTemplate = ensureObject(value.requestTemplate); + value.responsePath = stringValue(value.responsePath); + value.normalizeRules = sanitizeNormalizeRules(value.normalizeRules); + value.artifactContract = sanitizeArtifactContract(value.artifactContract); + value.validationHints = sanitizeValidationHints(value.validationHints); + value.evidence = sanitizeEvidence(value.evidence); + value.readiness = sanitizeReadiness(value.readiness); + value.apiEndpoints = sanitizeApiEndpoints(value.apiEndpoints); + value.staticParams = ensureObject(value.staticParams); + value.columnDefs = Array.isArray(value.columnDefs) ? value.columnDefs : []; + value.confidence = clampConfidence(value.confidence); + value.uncertainties = sanitizeStringList(value.uncertainties); + return value; +} + +function sanitizeMainRequest(mainRequest) { + const value = ensureObject(mainRequest); + if (!Object.keys(value).length) return null; + return { + apiEndpoint: sanitizeApiEndpoint(value.apiEndpoint), + requestTemplate: ensureObject(value.requestTemplate), + responsePath: stringValue(value.responsePath), + columnDefs: Array.isArray(value.columnDefs) ? value.columnDefs : [], + }; +} + +function sanitizeEnrichmentRequests(enrichmentRequests) { + if (!Array.isArray(enrichmentRequests)) return []; + return enrichmentRequests + .map((item) => ({ + name: stringValue(item?.name), + apiEndpoint: sanitizeApiEndpoint(item?.apiEndpoint), + paramBindings: ensureObject(item?.paramBindings), + responsePath: stringValue(item?.responsePath), + consumedFields: sanitizeStringList(item?.consumedFields), + })) + .filter((item) => item.name); +} + +function sanitizeMergePlan(mergePlan) { + const value = ensureObject(mergePlan); + if (!Object.keys(value).length) return null; + return { + joinKeys: sanitizeStringList(value.joinKeys), + fieldMappings: Array.isArray(value.fieldMappings) + ? value.fieldMappings.map((item) => ({ + outputField: stringValue(item?.outputField), + sourceType: stringValue(item?.sourceType), + sourceField: stringValue(item?.sourceField), + requestName: stringValue(item?.requestName) || null, + })).filter((item) => item.outputField) + : [], + aggregateRules: sanitizeStringList(value.aggregateRules), + outputColumns: Array.isArray(value.outputColumns) ? value.outputColumns : [], + }; +} + +function sanitizeSceneIdDiagnostics(sceneIdDiagnostics) { + const value = ensureObject(sceneIdDiagnostics); + return { + candidateSource: stringValue(value.candidateSource), + valid: value.valid !== false, + invalidReason: stringValue(value.invalidReason) || null, + candidates: Array.isArray(value.candidates) + ? value.candidates + .map((candidate) => ({ + value: stringValue(candidate?.value), + source: stringValue(candidate?.source), + valid: candidate?.valid !== false, + reason: stringValue(candidate?.reason) || null, + })) + .filter((candidate) => candidate.value) + : [], + }; +} + +function sanitizeBootstrap(bootstrap) { + const value = ensureObject(bootstrap); + return { + expectedDomain: stringValue(value.expectedDomain), + targetUrl: stringValue(value.targetUrl), + requiresTargetPage: value.requiresTargetPage !== false, + pageTitleKeywords: sanitizeStringList(value.pageTitleKeywords), + source: stringValue(value.source), + }; +} + +function sanitizeParams(params) { + if (!Array.isArray(params)) return []; + return params + .map((param) => ({ + name: stringValue(param?.name), + resolver: stringValue(param?.resolver), + required: Boolean(param?.required), + promptMissing: stringValue(param?.promptMissing), + promptAmbiguous: stringValue(param?.promptAmbiguous), + resolverConfig: ensureObject(param?.resolverConfig), + })) + .filter((param) => param.name); +} + +function sanitizeModes(modes) { + if (!Array.isArray(modes)) return []; + return modes + .map((mode) => ({ + name: stringValue(mode?.name), + label: stringValue(mode?.label) || null, + condition: mode?.condition && typeof mode.condition === "object" + ? { + field: stringValue(mode.condition.field), + operator: stringValue(mode.condition.operator) || "equals", + value: mode.condition.value, + } + : null, + apiEndpoint: sanitizeApiEndpoint(mode?.apiEndpoint), + columnDefs: Array.isArray(mode?.columnDefs) ? mode.columnDefs : [], + requestTemplate: ensureObject(mode?.requestTemplate), + normalizeRules: sanitizeNormalizeRules(mode?.normalizeRules), + responsePath: stringValue(mode?.responsePath), + })) + .filter((mode) => mode.name); +} + +function sanitizeWorkflowSteps(steps) { + if (!Array.isArray(steps)) return []; + return steps + .map((step) => ({ + type: stringValue(step?.type), + entry: stringValue(step?.entry) || null, + source: stringValue(step?.source) || null, + expr: stringValue(step?.expr) || null, + description: stringValue(step?.description) || null, + endpoint: stringValue(step?.endpoint) || null, + })) + .filter((step) => step.type); +} + +function sanitizeWorkflowEvidence(workflowEvidence) { + const value = ensureObject(workflowEvidence); + return { + requestEntries: sanitizeStringList(value.requestEntries), + paginationFields: sanitizeStringList(value.paginationFields), + secondaryRequestEntries: sanitizeStringList(value.secondaryRequestEntries), + postProcessSteps: sanitizeStringList(value.postProcessSteps), + }; +} + +function sanitizeNormalizeRules(rules) { + const value = ensureObject(rules); + if (!Object.keys(value).length) return null; + return { + type: stringValue(value.type), + requiredFields: sanitizeStringList(value.requiredFields), + filterNull: value.filterNull !== false, + }; +} + +function sanitizeArtifactContract(contract) { + const value = ensureObject(contract); + return { + type: stringValue(value.type) || "report-artifact", + successStatus: sanitizeStringList(value.successStatus).length + ? sanitizeStringList(value.successStatus) + : ["ok", "partial", "empty"], + failureStatus: sanitizeStringList(value.failureStatus).length + ? sanitizeStringList(value.failureStatus) + : ["blocked", "error"], + }; +} + +function sanitizeValidationHints(hints) { + const value = ensureObject(hints); + return { + requiresTargetPage: value.requiresTargetPage !== false, + runtimeCompatible: value.runtimeCompatible !== false, + manualCompletionRequired: Boolean(value.manualCompletionRequired), + missingPieces: sanitizeStringList(value.missingPieces), + }; +} + +function sanitizeEvidence(evidence) { + if (!Array.isArray(evidence)) return []; + return evidence + .map((item) => ({ + kind: stringValue(item?.kind), + evidenceType: stringValue(item?.evidenceType) || "signal", + layer: stringValue(item?.layer) || "business", + subject: stringValue(item?.subject) || null, + summary: stringValue(item?.summary), + source: stringValue(item?.source) || null, + confidence: clampConfidence(item?.confidence), + payload: item?.payload && typeof item.payload === "object" && !Array.isArray(item.payload) + ? item.payload + : null, + })) + .filter((item) => item.summary); +} + +function sanitizeReadiness(readiness) { + const value = ensureObject(readiness); + return { + level: stringValue(value.level), + confidence: clampConfidence(value.confidence), + gates: Array.isArray(value.gates) + ? value.gates + .map((gate) => ({ + name: stringValue(gate?.name), + passed: Boolean(gate?.passed), + reason: stringValue(gate?.reason) || null, + })) + .filter((gate) => gate.name) + : [], + risks: sanitizeStringList(value.risks), + missingPieces: sanitizeStringList(value.missingPieces), + notes: sanitizeStringList(value.notes), + }; +} + +function sanitizeApiEndpoints(endpoints) { + if (!Array.isArray(endpoints)) return []; + return endpoints.map(sanitizeApiEndpoint).filter(Boolean); +} + +function sanitizeApiEndpoint(endpoint) { + if (!endpoint || typeof endpoint !== "object") return null; + const url = stringValue(endpoint.url); + if (!url) return null; + return { + name: stringValue(endpoint.name) || inferEndpointName(url), + url, + method: stringValue(endpoint.method).toUpperCase() || "GET", + contentType: stringValue(endpoint.contentType) || null, + description: stringValue(endpoint.description) || null, + }; +} + +function inferEndpointName(url) { + const parts = url.split(/[/?#]/).filter(Boolean); + return parts[parts.length - 1] || "endpoint"; +} + +function mergeSceneIr(deterministic, llmSceneIr, warnings) { + const merged = JSON.parse(JSON.stringify(deterministic)); + const llm = llmSceneIr ? sanitizeSceneIr(llmSceneIr) : null; + + if (!llm) { + merged.sceneIdDiagnostics = sanitizeSceneIdDiagnostics(merged.sceneIdDiagnostics); + merged.validationHints.manualCompletionRequired = merged.readiness.level !== "A"; + return sanitizeSceneIr(merged); } - try { - if (pathname === "/health" && req.method === "GET") { - handleHealth(req, res); - } else if (pathname === "/analyze" && req.method === "POST") { - await handleAnalyze(req, res); - } else if (pathname === "/analyze-deep" && req.method === "POST") { - await handleAnalyzeDeep(req, res); - } else if (pathname === "/generate" && req.method === "POST") { - await handleGenerate(req, res); - } else if (pathname === "/select-folder" && req.method === "POST") { - await handleSelectFolder(req, res); - } else if (pathname === "/select-file" && req.method === "POST") { - await handleSelectFile(req, res); - } else if (pathname === "/" || pathname === "/index.html") { - serveStatic(res, path.join(__dirname, "sg_scene_generator.html")); - } else { - const filePath = path.resolve(__dirname, pathname); - const resolvedDir = path.resolve(__dirname); - if (!filePath.startsWith(resolvedDir + path.sep) && filePath !== resolvedDir) { - res.writeHead(403, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Forbidden" })); + const sceneIdentity = chooseSceneIdentity(deterministic, llm, warnings); + merged.sceneId = sceneIdentity.sceneId; + merged.sceneIdDiagnostics = sceneIdentity.sceneIdDiagnostics; + merged.sceneName = chooseSoftValue(deterministic.sceneName, llm.sceneName); + merged.sceneKind = chooseSoftValue(deterministic.sceneKind, llm.sceneKind) || "report_collection"; + + merged.workflowArchetype = chooseArchetype(deterministic, llm, warnings); + merged.bootstrap = mergeBootstrap(deterministic.bootstrap, llm.bootstrap, warnings); + merged.params = mergeByName(deterministic.params, llm.params); + merged.modes = deterministic.modes.length ? deterministic.modes : llm.modes; + merged.defaultMode = chooseSoftValue(deterministic.defaultMode, llm.defaultMode); + merged.modeSwitchField = chooseSoftValue(deterministic.modeSwitchField, llm.modeSwitchField); + merged.workflowSteps = mergeWorkflowSteps(deterministic.workflowSteps, llm.workflowSteps); + merged.workflowEvidence = mergeWorkflowEvidence(deterministic.workflowEvidence, llm.workflowEvidence); + merged.mainRequest = deterministic.mainRequest || llm.mainRequest || null; + merged.enrichmentRequests = deterministic.enrichmentRequests?.length + ? deterministic.enrichmentRequests + : llm.enrichmentRequests; + merged.mergePlan = deterministic.mergePlan || llm.mergePlan || null; + merged.requestTemplate = mergeObjects(deterministic.requestTemplate, llm.requestTemplate); + merged.responsePath = chooseHardValue(deterministic.responsePath, llm.responsePath); + merged.normalizeRules = deterministic.normalizeRules || llm.normalizeRules || null; + merged.artifactContract = deterministic.artifactContract || llm.artifactContract; + merged.apiEndpoints = mergeEndpoints(deterministic.apiEndpoints, llm.apiEndpoints); + merged.staticParams = mergeObjects(deterministic.staticParams, llm.staticParams); + merged.columnDefs = deterministic.columnDefs.length ? deterministic.columnDefs : llm.columnDefs; + merged.evidence = mergeEvidence(deterministic.evidence, llm.evidence); + merged.uncertainties = uniqueStrings([...(deterministic.uncertainties || []), ...(llm.uncertainties || [])]); + + merged.confidence = Number( + ( + clampConfidence(deterministic.confidence) * 0.7 + + clampConfidence(llm.confidence) * 0.3 + + (llm.evidence.length ? 0.02 : 0) + ).toFixed(2) + ); + + merged.readiness = computeReadinessPreview(merged, warnings, llm.readiness); + merged.validationHints = { + requiresTargetPage: merged.bootstrap.requiresTargetPage !== false, + runtimeCompatible: merged.params.every((param) => + ["dictionary_entity", "month_week_period", "fixed_enum", "literal_passthrough"].includes(param.resolver) + ), + manualCompletionRequired: merged.readiness.level !== "A", + missingPieces: merged.readiness.missingPieces.slice(), + }; + + return sanitizeSceneIr(merged); +} + +function chooseSoftValue(primary, secondary) { + return stringValue(secondary) || stringValue(primary) || ""; +} + +function chooseHardValue(primary, secondary) { + const primaryValue = stringValue(primary); + if (primaryValue) return primaryValue; + return stringValue(secondary); +} + +function chooseArchetype(deterministic, llm, warnings) { + const deterministicValue = stringValue(deterministic.workflowArchetype); + const llmValue = stringValue(llm.workflowArchetype); + if (deterministicValue && llmValue && deterministicValue !== llmValue) { + warnings.push(`Workflow archetype conflict: deterministic=${deterministicValue}, llm=${llmValue}`); + } + return deterministicValue || llmValue || "single_request_table"; +} + +function chooseSceneIdentity(deterministic, llm, warnings) { + const deterministicDiagnostics = sanitizeSceneIdDiagnostics(deterministic.sceneIdDiagnostics); + const llmDiagnostics = sanitizeSceneIdDiagnostics(llm.sceneIdDiagnostics); + const candidates = [ + { + sceneId: stringValue(llm.sceneId), + diagnostics: { + ...llmDiagnostics, + candidateSource: llmDiagnostics.candidateSource || "llm_semantic", + }, + }, + { + sceneId: stringValue(deterministic.sceneId), + diagnostics: { + ...deterministicDiagnostics, + candidateSource: deterministicDiagnostics.candidateSource || "deterministic", + }, + }, + ].filter((item) => item.sceneId); + + const validCandidate = candidates.find((item) => item.diagnostics.valid); + if ( + stringValue(deterministic.sceneId) && + stringValue(llm.sceneId) && + stringValue(deterministic.sceneId) !== stringValue(llm.sceneId) + ) { + warnings.push(`SceneId conflict: deterministic=${deterministic.sceneId}, llm=${llm.sceneId}`); + } + + if (validCandidate) { + return { + sceneId: validCandidate.sceneId, + sceneIdDiagnostics: validCandidate.diagnostics, + }; + } + + const fallback = candidates[0] || { + sceneId: "", + diagnostics: { + candidateSource: "", + valid: false, + invalidReason: "empty_scene_id", + candidates: [], + }, + }; + return { + sceneId: fallback.sceneId, + sceneIdDiagnostics: { + ...fallback.diagnostics, + valid: false, + invalidReason: fallback.diagnostics.invalidReason || "invalid_scene_id", + }, + }; +} + +function mergeBootstrap(deterministic, llm, warnings) { + const deterministicTarget = isUnsafeBootstrapValue(deterministic.targetUrl) ? "" : stringValue(deterministic.targetUrl); + const deterministicDomain = isUnsafeBootstrapValue(deterministic.expectedDomain) ? "" : stringValue(deterministic.expectedDomain); + const llmTarget = isUnsafeBootstrapValue(llm.targetUrl) ? "" : stringValue(llm.targetUrl); + const llmDomain = isUnsafeBootstrapValue(llm.expectedDomain) ? "" : stringValue(llm.expectedDomain); + const merged = { + expectedDomain: chooseHardValue(deterministicDomain, llmDomain), + targetUrl: chooseHardValue(deterministicTarget, llmTarget), + requiresTargetPage: deterministic.requiresTargetPage !== false && llm.requiresTargetPage !== false, + pageTitleKeywords: uniqueStrings([...(deterministic.pageTitleKeywords || []), ...(llm.pageTitleKeywords || [])]), + source: stringValue(deterministic.source) || stringValue(llm.source) || "deterministic", + }; + + if (stringValue(llm.targetUrl) && !llmTarget) { + warnings.push(`Ignored unsafe llm bootstrap target: ${llm.targetUrl}`); + } + if (stringValue(llm.expectedDomain) && !llmDomain) { + warnings.push(`Ignored unsafe llm bootstrap domain: ${llm.expectedDomain}`); + } + + if ( + deterministic.targetUrl && + llm.targetUrl && + stringValue(deterministic.targetUrl) !== stringValue(llm.targetUrl) + ) { + warnings.push(`Bootstrap target conflict: deterministic=${deterministic.targetUrl}, llm=${llm.targetUrl}`); + } + if ( + deterministic.expectedDomain && + llm.expectedDomain && + stringValue(deterministic.expectedDomain) !== stringValue(llm.expectedDomain) + ) { + warnings.push( + `Bootstrap domain conflict: deterministic=${deterministic.expectedDomain}, llm=${llm.expectedDomain}` + ); + } + return merged; +} + +function isUnsafeBootstrapValue(value) { + const text = stringValue(value).toLowerCase(); + if (!text) return false; + return ( + text.includes("localhost") || + text.includes("127.0.0.1") || + text.includes("surfaceservices") || + text.includes("reportservices") + ); +} + +function mergeByName(primary, secondary) { + const map = new Map(); + for (const item of secondary || []) { + map.set(item.name, item); + } + for (const item of primary || []) { + map.set(item.name, item); + } + return Array.from(map.values()).filter((item) => item && item.name); +} + +function mergeWorkflowSteps(primary, secondary) { + const merged = []; + const seen = new Set(); + for (const step of [...(primary || []), ...(secondary || [])]) { + if (!step || !step.type) continue; + const key = [step.type, step.entry || "", step.endpoint || "", step.expr || ""].join("|"); + if (seen.has(key)) continue; + seen.add(key); + merged.push(step); + } + return merged; +} + +function mergeWorkflowEvidence(primary, secondary) { + return { + requestEntries: uniqueStrings([...(primary?.requestEntries || []), ...(secondary?.requestEntries || [])]), + paginationFields: uniqueStrings([...(primary?.paginationFields || []), ...(secondary?.paginationFields || [])]), + secondaryRequestEntries: uniqueStrings([ + ...(primary?.secondaryRequestEntries || []), + ...(secondary?.secondaryRequestEntries || []), + ]), + postProcessSteps: uniqueStrings([...(primary?.postProcessSteps || []), ...(secondary?.postProcessSteps || [])]), + }; +} + +function mergeObjects(primary, secondary) { + return { + ...ensureObject(secondary), + ...ensureObject(primary), + }; +} + +function mergeEndpoints(primary, secondary) { + const map = new Map(); + for (const endpoint of secondary || []) { + const key = `${endpoint.method}|${endpoint.url}`; + map.set(key, endpoint); + } + for (const endpoint of primary || []) { + const key = `${endpoint.method}|${endpoint.url}`; + map.set(key, endpoint); + } + return Array.from(map.values()); +} + +function mergeEvidence(primary, secondary) { + const seen = new Set(); + const merged = []; + for (const item of [...(primary || []), ...(secondary || [])]) { + if (!item || !item.summary) continue; + const key = `${item.kind}|${item.evidenceType || ""}|${item.summary}`; + if (seen.has(key)) continue; + seen.add(key); + merged.push(item); + } + return merged; +} + +function computeReadinessPreview(sceneIr, warnings, llmReadiness) { + const risks = []; + const missingPieces = []; + const notes = []; + const gates = []; + + if (!sceneIr.sceneIdDiagnostics?.valid) { + missingPieces.push("invalid_scene_id"); + risks.push( + `Scene id is invalid${sceneIr.sceneIdDiagnostics?.invalidReason ? `: ${sceneIr.sceneIdDiagnostics.invalidReason}` : "."}` + ); + } + + const hasUnsafeBootstrap = + isUnsafeBootstrapValue(sceneIr.bootstrap.targetUrl) || isUnsafeBootstrapValue(sceneIr.bootstrap.expectedDomain); + if (hasUnsafeBootstrap) { + missingPieces.push("bootstrap_target"); + risks.push("Bootstrap resolves to localhost/helper/export instead of a business domain."); + } else if (!sceneIr.bootstrap.targetUrl && !sceneIr.bootstrap.expectedDomain) { + missingPieces.push("bootstrap_target"); + risks.push("Bootstrap target/domain is missing."); + } else if (!sceneIr.bootstrap.expectedDomain) { + risks.push("Expected domain is missing."); + } + + if (!sceneIr.apiEndpoints.length) { + missingPieces.push("api_endpoint"); + risks.push("No API endpoint is available."); + } + + if (!sceneIr.workflowSteps.length) { + missingPieces.push("workflow_steps"); + risks.push("Workflow steps are incomplete."); + } + + const businessEndpoints = (sceneIr.apiEndpoints || []).filter((endpoint) => !isUnsafeBootstrapValue(endpoint.url)); + const requestContract = previewRequestContract(sceneIr, businessEndpoints); + if (!requestContract.passed) { + missingPieces.push(requestContract.reason || "request_contract"); + risks.push(requestContract.message); + } + + const responseContract = previewResponseContract(sceneIr, businessEndpoints); + if (!responseContract.passed) { + missingPieces.push(responseContract.reason || "response_contract"); + risks.push(responseContract.message); + } + + const workflowContract = previewWorkflowContract(sceneIr, businessEndpoints); + if (!workflowContract.passed) { + missingPieces.push(workflowContract.reason || "workflow_contract"); + risks.push(workflowContract.message); + } + + if (sceneIr.workflowArchetype === "multi_mode_request" && !sceneIr.modes.length) { + missingPieces.push("modes"); + risks.push("Multi-mode workflow has no resolved modes."); + } + + if (sceneIr.workflowArchetype === "paginated_enrichment") { + const hasPaginate = + sceneIr.workflowSteps.some((step) => step.type === "paginate") || + (sceneIr.workflowEvidence?.paginationFields || []).length > 0; + const hasSecondary = + sceneIr.workflowSteps.some((step) => step.type === "secondary_request") || + (sceneIr.workflowEvidence?.secondaryRequestEntries || []).length > 0; + const hasPostProcess = + sceneIr.workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type)) || + (sceneIr.workflowEvidence?.postProcessSteps || []).length > 0; + if (!hasPaginate) { + missingPieces.push("paginate_step"); + risks.push("Paginated enrichment is missing a pagination step."); + } + if (!hasSecondary || sceneIr.apiEndpoints.length < 2) { + missingPieces.push("secondary_request"); + risks.push("Paginated enrichment is missing a strong secondary request path."); + } + if (!hasPostProcess) { + missingPieces.push("post_process"); + risks.push("Paginated enrichment is missing filter/transform/export evidence."); + } + } + + if (!sceneIr.validationHints.runtimeCompatible) { + risks.push("Some params require runtime support not confirmed by the frontend preview."); + } + + gates.push({ + name: "scene_id_valid", + passed: sceneIr.sceneIdDiagnostics?.valid !== false, + reason: sceneIr.sceneIdDiagnostics?.valid === false ? sceneIr.sceneIdDiagnostics.invalidReason || "invalid_scene_id" : null, + }); + gates.push({ + name: "bootstrap_resolved", + passed: !hasUnsafeBootstrap && Boolean(sceneIr.bootstrap.targetUrl || sceneIr.bootstrap.expectedDomain), + reason: !hasUnsafeBootstrap && Boolean(sceneIr.bootstrap.targetUrl || sceneIr.bootstrap.expectedDomain) + ? null + : "bootstrap_target", + }); + gates.push({ + name: "request_contract_complete", + passed: requestContract.passed, + reason: requestContract.passed ? null : requestContract.reason, + }); + gates.push({ + name: "response_contract_complete", + passed: responseContract.passed, + reason: responseContract.passed ? null : responseContract.reason, + }); + gates.push({ + name: "workflow_contract_complete", + passed: workflowContract.passed, + reason: workflowContract.passed ? null : workflowContract.reason, + }); + gates.push({ + name: "workflow_complete_for_archetype", + passed: workflowContract.passed, + reason: workflowContract.passed ? null : workflowContract.reason, + }); + gates.push({ + name: "runtime_contract_compatible", + passed: sceneIr.validationHints.runtimeCompatible !== false, + reason: sceneIr.validationHints.runtimeCompatible !== false ? null : "runtime_contract_incompatible", + }); + + for (const warning of warnings || []) { + risks.push(warning); + } + + if (llmReadiness && Array.isArray(llmReadiness.notes)) { + notes.push(...sanitizeStringList(llmReadiness.notes)); + } + + let level = "A"; + if (missingPieces.length > 0) { + level = missingPieces.length >= 2 ? "C" : "B"; + } else if (risks.length > 1 || clampConfidence(sceneIr.confidence) < 0.7) { + level = "B"; + } + + if (level === "A") { + notes.unshift("Ready for direct internal-network trial."); + } else if (level === "B") { + notes.unshift("Structurally plausible, but human review is recommended."); + } else { + notes.unshift("Draft only; manual completion is required before trial."); + } + + return { + level, + confidence: clampConfidence( + llmReadiness?.confidence ? (sceneIr.confidence * 0.7 + llmReadiness.confidence * 0.3) : sceneIr.confidence + ), + gates, + risks: uniqueStrings(risks), + missingPieces: uniqueStrings(missingPieces), + notes: uniqueStrings(notes), + }; +} + +function previewRequestContract(sceneIr, businessEndpoints) { + const endpointCount = (businessEndpoints || []).length; + const hasRequestStep = (sceneIr.workflowSteps || []).some((step) => ["request", "paginate", "secondary_request", "page_state"].includes(step.type)); + const hasRequestEvidence = (sceneIr.workflowEvidence?.requestEntries || []).length > 0; + const hasParams = (sceneIr.params || []).length > 0; + + if (sceneIr.workflowArchetype === "single_request_enrichment") { + return endpointCount >= 2 && Boolean(sceneIr.mainRequest) && (sceneIr.enrichmentRequests || []).length > 0 + ? { passed: true } + : { + passed: false, + reason: endpointCount >= 2 ? "main_request" : "request_endpoint", + message: endpointCount >= 2 + ? "G1-E workflow is missing main/enrichment request evidence." + : "G1-E workflow requires both main and enrichment business endpoints.", + }; + } + + if (sceneIr.workflowArchetype === "multi_mode_request") { + const hasModes = (sceneIr.modes || []).length > 0; + const hasModeSwitch = Boolean(sceneIr.modeSwitchField); + return endpointCount > 0 && hasModes && hasModeSwitch + ? { passed: true } + : { + passed: false, + reason: !hasModes || !hasModeSwitch ? "request_mode_param" : "request_endpoint", + message: !hasModes || !hasModeSwitch + ? "Multi-mode request is missing mode selection contract." + : "Request contract is missing business endpoint evidence.", + }; + } + + if (sceneIr.workflowArchetype === "paginated_enrichment") { + return endpointCount >= 2 && (hasRequestStep || hasRequestEvidence) + ? { passed: true } + : { + passed: false, + reason: endpointCount >= 2 ? "request_workflow" : "request_endpoint", + message: endpointCount >= 2 + ? "Paginated enrichment is missing request workflow evidence." + : "Paginated enrichment requires both primary and secondary business endpoints.", + }; + } + + if (sceneIr.workflowArchetype === "page_state_eval") { + return hasRequestStep || endpointCount > 0 + ? { passed: true } + : { + passed: false, + reason: "request_workflow", + message: "Page-state evaluation is missing request/state workflow evidence.", + }; + } + + return endpointCount > 0 || hasRequestStep || hasRequestEvidence || hasParams + ? { passed: true } + : { + passed: false, + reason: "request_endpoint", + message: "Request contract is missing business endpoint or request entry evidence.", + }; +} + +function previewResponseContract(sceneIr, businessEndpoints) { + const endpointCount = (businessEndpoints || []).length; + const hasTransform = (sceneIr.workflowSteps || []).some((step) => ["transform", "filter", "export"].includes(step.type)); + const hasResponsePath = Boolean(sceneIr.responsePath) || (sceneIr.modes || []).some((mode) => Boolean(mode.responsePath)); + + if (sceneIr.workflowArchetype === "single_request_enrichment") { + const hasColumns = (sceneIr.mergePlan?.outputColumns || []).length > 0 || (sceneIr.columnDefs || []).length > 0; + return endpointCount >= 2 && Boolean(sceneIr.mergePlan) && hasColumns + ? { passed: true } + : { + passed: false, + reason: endpointCount >= 2 ? "merge_plan" : "response_path", + message: endpointCount >= 2 + ? "G1-E workflow is missing merge/output evidence." + : "G1-E workflow lacks enough response-side endpoint evidence.", + }; + } + + if (sceneIr.workflowArchetype === "page_state_eval") { + return { passed: true }; + } + + if (sceneIr.workflowArchetype === "paginated_enrichment") { + return endpointCount >= 2 && hasResponsePath + ? { passed: true } + : { + passed: false, + reason: !hasResponsePath ? "response_path" : "response_endpoint", + message: !hasResponsePath + ? "Paginated enrichment is missing response extraction path." + : "Paginated enrichment lacks enough response-side endpoints.", + }; + } + + return hasResponsePath || hasTransform || endpointCount > 0 + ? { passed: true } + : { + passed: false, + reason: "response_path", + message: "Response contract is missing response extraction evidence.", + }; +} + +function previewWorkflowContract(sceneIr, businessEndpoints) { + const steps = sceneIr.workflowSteps || []; + if (!steps.length) { + return { + passed: false, + reason: "workflow_steps", + message: "Workflow contract is missing executable steps.", + }; + } + + if (sceneIr.workflowArchetype === "single_request_enrichment") { + const hasRequest = steps.some((step) => step.type === "request"); + const hasEnrichment = steps.some((step) => step.type === "enrichment_request"); + const hasTransform = steps.some((step) => step.type === "transform"); + return hasRequest && hasEnrichment && hasTransform && Boolean(sceneIr.mergePlan) + ? { passed: true } + : { + passed: false, + reason: !hasRequest ? "workflow_request" : !hasEnrichment ? "enrichment_requests" : !hasTransform ? "workflow_transform" : "merge_plan", + message: "G1-E workflow requires request, enrichment_request, transform, and merge_plan.", + }; + } + + if (sceneIr.workflowArchetype === "multi_mode_request") { + const hasRequest = steps.some((step) => step.type === "request"); + const hasTransform = steps.some((step) => step.type === "transform"); + return hasRequest && hasTransform + ? { passed: true } + : { + passed: false, + reason: !hasRequest ? "workflow_request" : "workflow_transform", + message: "Multi-mode request requires request and transform steps.", + }; + } + + if (sceneIr.workflowArchetype === "paginated_enrichment") { + const hasPaginate = + steps.some((step) => step.type === "paginate") || + (sceneIr.workflowEvidence?.paginationFields || []).length > 0; + const hasSecondary = + steps.some((step) => step.type === "secondary_request") || + (sceneIr.workflowEvidence?.secondaryRequestEntries || []).length > 0; + const hasPostProcess = + steps.some((step) => ["filter", "transform", "export"].includes(step.type)) || + (sceneIr.workflowEvidence?.postProcessSteps || []).length > 0; + if (!hasPaginate) { + return { + passed: false, + reason: "paginate_step", + message: "Paginated enrichment is missing a pagination step.", + }; + } + if (!hasSecondary || (businessEndpoints || []).length < 2) { + return { + passed: false, + reason: "secondary_request", + message: "Paginated enrichment is missing a strong secondary request path.", + }; + } + if (!hasPostProcess) { + return { + passed: false, + reason: "post_process", + message: "Paginated enrichment is missing filter/transform/export evidence.", + }; + } + } + + return { passed: true }; +} + +function getGenerationBlockers({ sceneIr, sceneId, sceneName, sourceDir }) { + const blockers = []; + const validation = validateSceneIdCandidate(sceneId, { sceneName, sourceDir }); + if (!validation.valid) { + blockers.push(`invalid_scene_id:${validation.reason}`); + } + if (sceneIr?.sceneIdDiagnostics && sceneIr.sceneIdDiagnostics.valid === false) { + blockers.push( + `analysis_invalid_scene_id:${sceneIr.sceneIdDiagnostics.invalidReason || "invalid_scene_id"}` + ); + } + for (const gate of sceneIr?.readiness?.gates || []) { + if (!gate.passed) { + blockers.push(`gate_failed:${gate.name}${gate.reason ? `:${gate.reason}` : ""}`); + } + } + return uniqueStrings(blockers); +} + +function ensureObject(value) { + return value && typeof value === "object" && !Array.isArray(value) ? value : {}; +} + +function sanitizeStringList(value) { + if (!Array.isArray(value)) return []; + return uniqueStrings(value.map((item) => stringValue(item)).filter(Boolean)); +} + +function uniqueStrings(list) { + return Array.from(new Set((list || []).map((item) => stringValue(item)).filter(Boolean))); +} + +function clampConfidence(value) { + const numeric = typeof value === "number" ? value : Number(value); + if (!Number.isFinite(numeric)) return 0; + return Math.max(0, Math.min(1, Number(numeric.toFixed(2)))); +} + +function normalizeInputPath(value) { + const normalized = stringValue(value); + return normalized ? normalized.replace(/\\/g, "/") : ""; +} + +function normalizeJsonInput(value) { + if (!value) return null; + if (typeof value === "string") return value; + if (typeof value === "object") return JSON.stringify(value); + return null; +} + +function stringValue(value) { + return typeof value === "string" ? value.trim() : ""; +} + +function createServer() { + return http.createServer(async (req, res) => { + const url = new URL(req.url, `http://${req.headers.host}`); + const pathname = url.pathname; + + if (req.method === "OPTIONS") { + res.writeHead(204, { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET, POST, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type", + }); + res.end(); + return; + } + + try { + if (pathname === "/health" && req.method === "GET") { + handleHealth(req, res); return; } + if (pathname === "/analyze" && req.method === "POST") { + await handleAnalyze(req, res); + return; + } + if (pathname === "/analyze-deep" && req.method === "POST") { + await handleAnalyzeDeep(req, res); + return; + } + if (pathname === "/generate" && req.method === "POST") { + await handleGenerate(req, res); + return; + } + if (pathname === "/select-folder" && req.method === "POST") { + await handleSelectFolder(req, res); + return; + } + if (pathname === "/select-file" && req.method === "POST") { + await handleSelectFile(req, res); + return; + } + if (pathname === "/" || pathname === "/index.html") { + serveStatic(res, path.join(__dirname, "sg_scene_generator.html")); + return; + } + + const filePath = path.resolve(__dirname, "." + pathname); + const baseDir = path.resolve(__dirname); + if (!filePath.startsWith(baseDir + path.sep) && filePath !== baseDir) { + writeJson(res, 403, { error: "Forbidden" }); + return; + } + if (fs.existsSync(filePath) && fs.statSync(filePath).isFile()) { serveStatic(res, filePath); - } else { - res.writeHead(404, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Not found" })); + return; } - } - } catch (err) { - console.error(`[error] ${req.method} ${pathname}: ${err.message}`); - if (!res.headersSent) { - res.writeHead(500, { "Content-Type": "application/json" }); - } - res.end(JSON.stringify({ error: err.message })); - } -}); -server.listen(PORT, HOST, () => { - console.log(""); - console.log(" ╔══════════════════════════════════════════════════╗"); - console.log(" ║ sgClaw · Scene Skill Generator ║"); - console.log(" ╠══════════════════════════════════════════════════╣"); - console.log(" ║ ║"); - console.log(` ║ 访问地址: http://${HOST}:${PORT}/ ║`); - console.log(" ║ ║"); - console.log(" ║ 按 Ctrl+C 停止服务 ║"); - console.log(" ╚══════════════════════════════════════════════════╝"); - console.log(""); -}); + writeJson(res, 404, { error: "Not found" }); + } catch (error) { + console.error(`[error] ${req.method} ${pathname}: ${error.message}`); + if (!res.headersSent) { + writeJson(res, 500, { error: error.message }); + return; + } + res.end(JSON.stringify({ error: error.message })); + } + }); +} -process.on("SIGINT", () => { - if (server.closing) return; - server.closing = true; - console.log("\n[info] Shutting down..."); - server.close(() => process.exit(0)); - // 强制退出超时 - setTimeout(() => process.exit(0), 2000); -}); +if (require.main === module) { + const server = createServer(); + server.listen(PORT, HOST, () => { + console.log(""); + console.log(" ================================================"); + console.log(" sgClaw Scene Skill Generator"); + console.log(` http://${HOST}:${PORT}/`); + console.log(" Press Ctrl+C to stop"); + console.log(" ================================================"); + console.log(""); + }); + + process.on("SIGINT", () => { + if (server.closing) return; + server.closing = true; + console.log("\n[info] Shutting down..."); + server.close(() => process.exit(0)); + setTimeout(() => process.exit(0), 2000); + }); +} + +module.exports = { + computeReadinessPreview, + createServer, + getGenerationBlockers, + mergeSceneIr, + sanitizeSceneIr, + sanitizeSceneIdDiagnostics, +}; diff --git a/frontend/scene-generator/sg_scene_generator.html b/frontend/scene-generator/sg_scene_generator.html index 68f8496..5f9182c 100644 --- a/frontend/scene-generator/sg_scene_generator.html +++ b/frontend/scene-generator/sg_scene_generator.html @@ -3,620 +3,1097 @@ - 场景 Skill 生成器 + Scene Skill Generator +
-

场景 Skill 生成器

-

选择场景目录,配置参数,一键生成 skill 包。

+

Scene Skill Generator

+

Analyze a frontend scene directory, inspect merged Scene IR preview, override the workflow archetype when needed, then generate with `scene-ir-json` instead of the old flat scene info payload.

+
- -
-
-
- -

实时日志

-

显示生成过程的完整输出

+ + + +
+
-
-
选择场景目录并点击"生成 Skill"开始。
-
-
+
+
+
+
Detected Scene ID
+
-
+
+
+
Scene ID Source
+
-
+
+
+
Scene ID Validation
+
-
+
+
+
Detected Archetype
+
-
+
+
+
Overall Confidence
+
-
+
+
+
Bootstrap Domain
+
-
+
+
+
Bootstrap Target URL
+
-
+
+
+ +
+ +
+
+ +
+
+
+ +
+
+
+ +
+
+ +
+
+
+ +
+
+
+ +
+
+ +
+
+
+ +
+
+
+ +
+
+ +
{}
+
+
+ +
{}
+
+
+
+ + +
+
+

Generation Log

+

Streaming output from the generator. Completion also repeats readiness and archetype so the execution risk stays visible after generation.

+
+
+
+
Choose a scene directory, run deep analysis, then generate a skill package.
+
+
+
+
diff --git a/frontend/service-console/sg_claw_service_console.html b/frontend/service-console/sg_claw_service_console.html index a265867..ba8cf07 100644 --- a/frontend/service-console/sg_claw_service_console.html +++ b/frontend/service-console/sg_claw_service_console.html @@ -372,6 +372,14 @@
+
+ + +
+
+ + +
@@ -458,6 +466,8 @@ connectionState: document.getElementById("connectionState"), messageStream: document.getElementById("messageStream"), instructionInput: document.getElementById("instructionInput"), + pageUrlInput: document.getElementById("pageUrlInput"), + pageTitleInput: document.getElementById("pageTitleInput"), validationText: document.getElementById("validationText"), sendBtn: document.getElementById("sendBtn"), emptyState: document.getElementById("emptyState") @@ -688,14 +698,17 @@ return; } + const pageUrl = elements.pageUrlInput.value.trim(); + const pageTitle = elements.pageTitleInput.value.trim(); + setValidation(""); socket.send(JSON.stringify({ type: "submit_task", instruction, conversation_id: "", messages: [], - page_url: "", - page_title: "" + page_url: pageUrl, + page_title: pageTitle })); } diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..fb239ef --- /dev/null +++ b/package-lock.json @@ -0,0 +1,1038 @@ +{ + "name": "claw-new", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "claw-new", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "puppeteer": "^24.41.0" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.29.0", + "resolved": "https://registry.npmmirror.com/@babel/code-frame/-/code-frame-7.29.0.tgz", + "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", + "dependencies": { + "@babel/helper-validator-identifier": "^7.28.5", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.28.5", + "resolved": "https://registry.npmmirror.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", + "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@puppeteer/browsers": { + "version": "2.13.0", + "resolved": "https://registry.npmmirror.com/@puppeteer/browsers/-/browsers-2.13.0.tgz", + "integrity": "sha512-46BZJYJjc/WwmKjsvDFykHtXrtomsCIrwYQPOP7VfMJoZY2bsDF9oROBABR3paDjDcmkUye1Pb1BqdcdiipaWA==", + "dependencies": { + "debug": "^4.4.3", + "extract-zip": "^2.0.1", + "progress": "^2.0.3", + "proxy-agent": "^6.5.0", + "semver": "^7.7.4", + "tar-fs": "^3.1.1", + "yargs": "^17.7.2" + }, + "bin": { + "browsers": "lib/cjs/main-cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmmirror.com/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==" + }, + "node_modules/@types/node": { + "version": "25.6.0", + "resolved": "https://registry.npmmirror.com/@types/node/-/node-25.6.0.tgz", + "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", + "optional": true, + "dependencies": { + "undici-types": "~7.19.0" + } + }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmmirror.com/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmmirror.com/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmmirror.com/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmmirror.com/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmmirror.com/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmmirror.com/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/b4a": { + "version": "1.8.0", + "resolved": "https://registry.npmmirror.com/b4a/-/b4a-1.8.0.tgz", + "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, + "node_modules/bare-events": { + "version": "2.8.2", + "resolved": "https://registry.npmmirror.com/bare-events/-/bare-events-2.8.2.tgz", + "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.7.1", + "resolved": "https://registry.npmmirror.com/bare-fs/-/bare-fs-4.7.1.tgz", + "integrity": "sha512-WDRsyVN52eAx/lBamKD6uyw8H4228h/x0sGGGegOamM2cd7Pag88GfMQalobXI+HaEUxpCkbKQUDOQqt9wawRw==", + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.8.7", + "resolved": "https://registry.npmmirror.com/bare-os/-/bare-os-3.8.7.tgz", + "integrity": "sha512-G4Gr1UsGeEy2qtDTZwL7JFLo2wapUarz7iTMcYcMFdS89AIQuBoyjgXZz0Utv7uHs3xA9LckhVbeBi8lEQrC+w==", + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmmirror.com/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.13.0", + "resolved": "https://registry.npmmirror.com/bare-stream/-/bare-stream-2.13.0.tgz", + "integrity": "sha512-3zAJRZMDFGjdn+RVnNpF9kuELw+0Fl3lpndM4NcEOhb9zwtSo/deETfuIwMSE5BXanA0FrN1qVjffGwAg2Y7EA==", + "dependencies": { + "streamx": "^2.25.0", + "teex": "^1.0.1" + }, + "peerDependencies": { + "bare-abort-controller": "*", + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + }, + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.4.1", + "resolved": "https://registry.npmmirror.com/bare-url/-/bare-url-2.4.1.tgz", + "integrity": "sha512-fZapLWNB25gS+etK27NV9KgBNXgo2yeYHuj+OyPblQd6GYAE3JVy6aKxszMV5jhGGFwraXQKA5fldvf3lMyEqw==", + "dependencies": { + "bare-path": "^3.0.0" + } + }, + "node_modules/basic-ftp": { + "version": "5.3.0", + "resolved": "https://registry.npmmirror.com/basic-ftp/-/basic-ftp-5.3.0.tgz", + "integrity": "sha512-5K9eNNn7ywHPsYnFwjKgYH8Hf8B5emh7JKcPaVjjrMJFQQwGpwowEnZNEtHs7DfR7hCZsmaK3VA4HUK0YarT+w==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmmirror.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "engines": { + "node": "*" + } + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmmirror.com/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "engines": { + "node": ">=6" + } + }, + "node_modules/chromium-bidi": { + "version": "14.0.0", + "resolved": "https://registry.npmmirror.com/chromium-bidi/-/chromium-bidi-14.0.0.tgz", + "integrity": "sha512-9gYlLtS6tStdRWzrtXaTMnqcM4dudNegMXJxkR0I/CXObHalYeYcAMPrL19eroNZHtJ8DQmu1E+ZNOYu/IXMXw==", + "dependencies": { + "mitt": "^3.0.1", + "zod": "^3.24.1" + }, + "peerDependencies": { + "devtools-protocol": "*" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmmirror.com/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmmirror.com/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmmirror.com/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/cosmiconfig": { + "version": "9.0.1", + "resolved": "https://registry.npmmirror.com/cosmiconfig/-/cosmiconfig-9.0.1.tgz", + "integrity": "sha512-hr4ihw+DBqcvrsEDioRO31Z17x71pUYoNe/4h6Z0wB72p7MU7/9gH8Q3s12NFhHPfYBBOV3qyfUxmr/Yn3shnQ==", + "dependencies": { + "env-paths": "^2.2.1", + "import-fresh": "^3.3.0", + "js-yaml": "^4.1.0", + "parse-json": "^5.2.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/d-fischer" + }, + "peerDependencies": { + "typescript": ">=4.9.5" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmmirror.com/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmmirror.com/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmmirror.com/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/devtools-protocol": { + "version": "0.0.1595872", + "resolved": "https://registry.npmmirror.com/devtools-protocol/-/devtools-protocol-0.0.1595872.tgz", + "integrity": "sha512-kRfgp8vWVjBu/fbYCiVFiOqsCk3CrMKEo3WbgGT2NXK2dG7vawWPBljixajVgGK9II8rDO9G0oD0zLt3I1daRg==" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmmirror.com/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmmirror.com/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/env-paths": { + "version": "2.2.1", + "resolved": "https://registry.npmmirror.com/env-paths/-/env-paths-2.2.1.tgz", + "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", + "engines": { + "node": ">=6" + } + }, + "node_modules/error-ex": { + "version": "1.3.4", + "resolved": "https://registry.npmmirror.com/error-ex/-/error-ex-1.3.4.tgz", + "integrity": "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==", + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmmirror.com/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "engines": { + "node": ">=6" + } + }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmmirror.com/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmmirror.com/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmmirror.com/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/events-universal": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/events-universal/-/events-universal-1.0.1.tgz", + "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", + "dependencies": { + "bare-events": "^2.7.0" + } + }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmmirror.com/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmmirror.com/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" + }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmmirror.com/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "dependencies": { + "pend": "~1.2.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmmirror.com/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmmirror.com/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/get-uri": { + "version": "6.0.5", + "resolved": "https://registry.npmmirror.com/get-uri/-/get-uri-6.0.5.tgz", + "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmmirror.com/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmmirror.com/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/import-fresh": { + "version": "3.3.1", + "resolved": "https://registry.npmmirror.com/import-fresh/-/import-fresh-3.3.1.tgz", + "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmmirror.com/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "engines": { + "node": ">= 12" + } + }, + "node_modules/is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmmirror.com/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==" + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmmirror.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "engines": { + "node": ">=8" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmmirror.com/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmmirror.com/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmmirror.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==" + }, + "node_modules/lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmmirror.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" + }, + "node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmmirror.com/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "engines": { + "node": ">=12" + } + }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmmirror.com/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node_modules/netmask": { + "version": "2.1.1", + "resolved": "https://registry.npmmirror.com/netmask/-/netmask-2.1.1.tgz", + "integrity": "sha512-eonl3sLUha+S1GzTPxychyhnUzKyeQkZ7jLjKrBagJgPla13F+uQ71HgpFefyHgqrjEbCPkDArxYsjY8/+gLKA==", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmmirror.com/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/pac-proxy-agent": { + "version": "7.2.0", + "resolved": "https://registry.npmmirror.com/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", + "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.6", + "pac-resolver": "^7.0.1", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmmirror.com/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmmirror.com/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dependencies": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmmirror.com/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmmirror.com/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" + }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmmirror.com/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/proxy-agent": { + "version": "6.5.0", + "resolved": "https://registry.npmmirror.com/proxy-agent/-/proxy-agent-6.5.0.tgz", + "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.6", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.1.0", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmmirror.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmmirror.com/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/puppeteer": { + "version": "24.41.0", + "resolved": "https://registry.npmmirror.com/puppeteer/-/puppeteer-24.41.0.tgz", + "integrity": "sha512-W6Fk0J3TPjjtwjXOyR/qf+YaL0H/Uq8HIgHcXG4mNM/IgbKMCH/HPyK0Fi2qbTU/QpSl9bCte2yBpGHKejTpIw==", + "hasInstallScript": true, + "dependencies": { + "@puppeteer/browsers": "2.13.0", + "chromium-bidi": "14.0.0", + "cosmiconfig": "^9.0.0", + "devtools-protocol": "0.0.1595872", + "puppeteer-core": "24.41.0", + "typed-query-selector": "^2.12.1" + }, + "bin": { + "puppeteer": "lib/cjs/puppeteer/node/cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/puppeteer-core": { + "version": "24.41.0", + "resolved": "https://registry.npmmirror.com/puppeteer-core/-/puppeteer-core-24.41.0.tgz", + "integrity": "sha512-rLIUri7E/NQ3APSEYCCozaSJx0u8Tu9wxO6BJwnvXmIgILSK3L0TombaVh3izp1njAGrO6H2ru0hcIrLF+gWLw==", + "dependencies": { + "@puppeteer/browsers": "2.13.0", + "chromium-bidi": "14.0.0", + "debug": "^4.4.3", + "devtools-protocol": "0.0.1595872", + "typed-query-selector": "^2.12.1", + "webdriver-bidi-protocol": "0.4.1", + "ws": "^8.19.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmmirror.com/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmmirror.com/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "engines": { + "node": ">=4" + } + }, + "node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmmirror.com/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmmirror.com/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "resolved": "https://registry.npmmirror.com/socks/-/socks-2.8.7.tgz", + "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "resolved": "https://registry.npmmirror.com/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", + "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmmirror.com/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/streamx": { + "version": "2.25.0", + "resolved": "https://registry.npmmirror.com/streamx/-/streamx-2.25.0.tgz", + "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmmirror.com/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmmirror.com/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/tar-fs": { + "version": "3.1.2", + "resolved": "https://registry.npmmirror.com/tar-fs/-/tar-fs-3.1.2.tgz", + "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/tar-stream": { + "version": "3.1.8", + "resolved": "https://registry.npmmirror.com/tar-stream/-/tar-stream-3.1.8.tgz", + "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/teex": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/teex/-/teex-1.0.1.tgz", + "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", + "dependencies": { + "streamx": "^2.12.5" + } + }, + "node_modules/text-decoder": { + "version": "1.2.7", + "resolved": "https://registry.npmmirror.com/text-decoder/-/text-decoder-1.2.7.tgz", + "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", + "dependencies": { + "b4a": "^1.6.4" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==" + }, + "node_modules/typed-query-selector": { + "version": "2.12.1", + "resolved": "https://registry.npmmirror.com/typed-query-selector/-/typed-query-selector-2.12.1.tgz", + "integrity": "sha512-uzR+FzI8qrUEIu96oaeBJmd9E7CFEiQ3goA5qCVgc4s5llSubcfGHq9yUstZx/k4s9dXHVKsE35YWoFyvEqEHA==" + }, + "node_modules/undici-types": { + "version": "7.19.2", + "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.19.2.tgz", + "integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==", + "optional": true + }, + "node_modules/webdriver-bidi-protocol": { + "version": "0.4.1", + "resolved": "https://registry.npmmirror.com/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.4.1.tgz", + "integrity": "sha512-ARrjNjtWRRs2w4Tk7nqrf2gBI0QXWuOmMCx2hU+1jUt6d00MjMxURrhxhGbrsoiZKJrhTSTzbIrc554iKI10qw==" + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmmirror.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + }, + "node_modules/ws": { + "version": "8.20.0", + "resolved": "https://registry.npmmirror.com/ws/-/ws-8.20.0.tgz", + "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmmirror.com/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmmirror.com/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmmirror.com/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "engines": { + "node": ">=12" + } + }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmmirror.com/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..097382d --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "name": "claw-new", + "version": "1.0.0", + "description": "sgClaw 项目仓库。", + "main": "index.js", + "directories": { + "doc": "docs", + "example": "examples", + "test": "tests" + }, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "puppeteer": "^24.41.0" + } +} diff --git a/resources/rules-102-business-targets-candidate.json b/resources/rules-102-business-targets-candidate.json new file mode 100644 index 0000000..8a66672 --- /dev/null +++ b/resources/rules-102-business-targets-candidate.json @@ -0,0 +1,209 @@ +{ + "version": "1.0", + "purpose": "Candidate allowlist additions for validating the 102 scene skills on an inner-network machine.", + "source": { + "skillsRoot": "examples/scene_skill_102_final_materialization_2026-04-19/skills", + "generatedDate": "2026-04-20", + "businessTargetCount": 17, + "ipLikeTargetCount": 10, + "domainLikeTargetCount": 7 + }, + "domains": { + "allowed_additions": [ + "10.4.39.180", + "20.76.56.147:14001", + "20.76.57.61:18080", + "20.77.115.36:31051", + "21.76.121.140:8080", + "21.77.244.194:18890", + "25.215.194.188:90", + "25.215.194.87:9999", + "25.215.212.117", + "25.215.213.128:18080", + "pis.sgcc.com.cn", + "pms30.gs.sgcc.com.cn:32003", + "south.95598.sgcc.com.cn", + "sso-isc.gs.sgcc.com.cn", + "tqxs.sgcc.com.cn", + "yx.gs.sgcc.com.cn", + "yxgateway.gs.sgcc.com.cn" + ] + }, + "sceneCoverage": [ + { + "target": "20.76.56.147:14001", + "sceneCount": 14, + "sceneIds": [ + "sweep-003-scene", + "sweep-015-scene", + "sweep-026-scene", + "sweep-038-scene", + "sweep-039-scene", + "sweep-040-scene", + "sweep-041-scene", + "sweep-049-scene", + "sweep-059-scene", + "sweep-061-scene", + "sweep-063-scene", + "sweep-077-scene", + "sweep-099-scene", + "sweep-100-scene" + ] + }, + { + "target": "yx.gs.sgcc.com.cn", + "sceneCount": 11, + "sceneIds": [ + "sweep-001-scene", + "sweep-002-scene", + "sweep-021-scene", + "sweep-043-scene", + "sweep-056-scene", + "sweep-057-scene", + "sweep-058-scene", + "sweep-060-scene", + "sweep-080-scene", + "sweep-093-scene", + "sweep-094-scene" + ] + }, + { + "target": "21.77.244.194:18890", + "sceneCount": 10, + "sceneIds": [ + "sweep-007-scene", + "sweep-027-scene", + "sweep-042-scene", + "sweep-044-scene", + "sweep-052-scene", + "sweep-074-scene", + "sweep-088-scene", + "sweep-090-scene", + "sweep-091-scene", + "sweep-092-scene" + ] + }, + { + "target": "20.76.57.61:18080", + "sceneCount": 7, + "sceneIds": [ + "sweep-030-scene", + "sweep-031-scene", + "sweep-076-scene", + "sweep-078-scene", + "sweep-079-scene", + "sweep-082-scene", + "sweep-083-scene" + ] + }, + { + "target": "yxgateway.gs.sgcc.com.cn", + "sceneCount": 7, + "sceneIds": [ + "sweep-013-scene", + "sweep-068-scene", + "sweep-069-scene", + "sweep-084-scene", + "sweep-085-scene", + "sweep-095-scene", + "sweep-101-scene" + ] + }, + { + "target": "25.215.194.188:90", + "sceneCount": 5, + "sceneIds": [ + "sweep-028-scene", + "sweep-032-scene", + "sweep-051-scene", + "sweep-053-scene", + "sweep-054-scene" + ] + }, + { + "target": "south.95598.sgcc.com.cn", + "sceneCount": 4, + "sceneIds": [ + "sweep-004-scene", + "sweep-005-scene", + "sweep-009-scene", + "sweep-022-scene" + ] + }, + { + "target": "20.77.115.36:31051", + "sceneCount": 3, + "sceneIds": [ + "sweep-064-scene", + "sweep-071-scene", + "sweep-086-scene" + ] + }, + { + "target": "25.215.212.117", + "sceneCount": 2, + "sceneIds": [ + "sweep-046-scene", + "sweep-087-scene" + ] + }, + { + "target": "pms30.gs.sgcc.com.cn:32003", + "sceneCount": 2, + "sceneIds": [ + "sweep-055-scene", + "sweep-072-scene" + ] + }, + { + "target": "tqxs.sgcc.com.cn", + "sceneCount": 2, + "sceneIds": [ + "sweep-029-scene", + "sweep-081-scene" + ] + }, + { + "target": "10.4.39.180", + "sceneCount": 1, + "sceneIds": [ + "sweep-070-scene" + ] + }, + { + "target": "21.76.121.140:8080", + "sceneCount": 1, + "sceneIds": [ + "sweep-025-scene" + ] + }, + { + "target": "25.215.194.87:9999", + "sceneCount": 1, + "sceneIds": [ + "sweep-062-scene" + ] + }, + { + "target": "25.215.213.128:18080", + "sceneCount": 1, + "sceneIds": [ + "sweep-047-scene" + ] + }, + { + "target": "pis.sgcc.com.cn", + "sceneCount": 1, + "sceneIds": [ + "sweep-073-scene" + ] + }, + { + "target": "sso-isc.gs.sgcc.com.cn", + "sceneCount": 1, + "sceneIds": [ + "sweep-098-scene" + ] + } + ] +} diff --git a/resources/rules-102-business-targets-merged.json b/resources/rules-102-business-targets-merged.json new file mode 100644 index 0000000..3359070 --- /dev/null +++ b/resources/rules-102-business-targets-merged.json @@ -0,0 +1,53 @@ +{ + "version": "1.0", + "demo_only_domains": [ + "baidu.com", + "www.baidu.com", + "zhihu.com", + "www.zhihu.com", + "zhuanlan.zhihu.com" + ], + "domains": { + "allowed": [ + "oa.example.com", + "erp.example.com", + "hr.example.com", + "sgcc.example.invalid", + "95598.example.invalid", + "baidu.com", + "www.baidu.com", + "zhihu.com", + "www.zhihu.com", + "zhuanlan.zhihu.com", + "10.4.39.180", + "20.76.56.147:14001", + "20.76.57.61:18080", + "20.77.115.36:31051", + "21.76.121.140:8080", + "21.77.244.194:18890", + "25.215.194.188:90", + "25.215.194.87:9999", + "25.215.212.117", + "25.215.213.128:18080", + "pis.sgcc.com.cn", + "pms30.gs.sgcc.com.cn:32003", + "south.95598.sgcc.com.cn", + "sso-isc.gs.sgcc.com.cn", + "tqxs.sgcc.com.cn", + "yx.gs.sgcc.com.cn", + "yxgateway.gs.sgcc.com.cn" + ] + }, + "pipe_actions": { + "allowed": [ + "click", + "type", + "navigate", + "getText", + "eval" + ], + "blocked": [ + "executeJsInPage" + ] + } +} diff --git a/resources/rules-102-business-targets.patch b/resources/rules-102-business-targets.patch new file mode 100644 index 0000000..9aa1ef3 --- /dev/null +++ b/resources/rules-102-business-targets.patch @@ -0,0 +1,35 @@ +--- a/resources/rules.json ++++ b/resources/rules.json +@@ + "domains": { + "allowed": [ + "oa.example.com", + "erp.example.com", + "hr.example.com", + "sgcc.example.invalid", + "95598.example.invalid", + "baidu.com", + "www.baidu.com", + "zhihu.com", + "www.zhihu.com", +- "zhuanlan.zhihu.com" ++ "zhuanlan.zhihu.com", ++ "10.4.39.180", ++ "20.76.56.147:14001", ++ "20.76.57.61:18080", ++ "20.77.115.36:31051", ++ "21.76.121.140:8080", ++ "21.77.244.194:18890", ++ "25.215.194.188:90", ++ "25.215.194.87:9999", ++ "25.215.212.117", ++ "25.215.213.128:18080", ++ "pis.sgcc.com.cn", ++ "pms30.gs.sgcc.com.cn:32003", ++ "south.95598.sgcc.com.cn", ++ "sso-isc.gs.sgcc.com.cn", ++ "tqxs.sgcc.com.cn", ++ "yx.gs.sgcc.com.cn", ++ "yxgateway.gs.sgcc.com.cn" + ] + }, diff --git a/src/agent/mod.rs b/src/agent/mod.rs index cddd68c..0e9544c 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -49,7 +49,9 @@ fn browser_backend_for_submit( )); } - Ok(Arc::new(PipeBrowserBackend::from_inner(browser_tool.clone()))) + Ok(Arc::new(PipeBrowserBackend::from_inner( + browser_tool.clone(), + ))) } fn configured_browser_ws_url(context: &AgentRuntimeContext) -> Option { @@ -142,7 +144,10 @@ mod tests { #[test] fn normalize_optional_submit_field_trims_and_drops_blank_values() { - assert_eq!(normalize_optional_submit_field(" \n\t ".to_string()), None); + assert_eq!( + normalize_optional_submit_field(" \n\t ".to_string()), + None + ); assert_eq!( normalize_optional_submit_field(" https://example.com/page ".to_string()), Some("https://example.com/page".to_string()) diff --git a/src/agent/task_runner.rs b/src/agent/task_runner.rs index d3db90c..7b98873 100644 --- a/src/agent/task_runner.rs +++ b/src/agent/task_runner.rs @@ -6,9 +6,7 @@ use crate::browser::BrowserBackend; use crate::compat::config_adapter::resolve_skills_dir_from_sgclaw_settings; use crate::compat::runtime::CompatTaskContext; use crate::config::SgClawSettings; -use crate::pipe::{ - AgentMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport, -}; +use crate::pipe::{AgentMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport}; use crate::runtime::RuntimeEngine; #[derive(Debug, Clone, PartialEq, Eq)] @@ -144,7 +142,14 @@ fn resolve_submit_instruction( instruction: String, page_url: Option<&str>, page_title: Option<&str>, -) -> Result<(String, Option), AgentMessage> { + skills_dir: &std::path::Path, +) -> Result< + ( + String, + Option, + ), + AgentMessage, +> { let raw_instruction = instruction; let trimmed_instruction = raw_instruction.trim().to_string(); if trimmed_instruction.is_empty() { @@ -154,10 +159,11 @@ fn resolve_submit_instruction( }); } - match crate::compat::deterministic_submit::decide_deterministic_submit( + match crate::compat::deterministic_submit::decide_deterministic_submit_with_skills_dir( &raw_instruction, page_url, page_title, + skills_dir, ) { crate::compat::deterministic_submit::DeterministicSubmitDecision::NotDeterministic => { Ok((trimmed_instruction, None)) @@ -195,14 +201,6 @@ pub fn run_submit_task( page_url, page_title, }; - let (instruction, deterministic_plan) = match resolve_submit_instruction( - instruction, - task_context.page_url.as_deref(), - task_context.page_title.as_deref(), - ) { - Ok(resolved) => resolved, - Err(completion) => return sink.send(&completion), - }; let _ = sink.send(&AgentMessage::LogEntry { level: "info".to_string(), message: runtime_version_log_message(), @@ -221,6 +219,15 @@ pub fn run_submit_task( Ok(Some(settings)) => { let resolved_skills_dir = resolve_skills_dir_from_sgclaw_settings(&context.workspace_root, &settings); + let (instruction, deterministic_plan) = match resolve_submit_instruction( + instruction, + task_context.page_url.as_deref(), + task_context.page_title.as_deref(), + &resolved_skills_dir, + ) { + Ok(resolved) => resolved, + Err(completion) => return sink.send(&completion), + }; let _ = sink.send(&AgentMessage::LogEntry { level: "info".to_string(), message: format!( @@ -386,14 +393,6 @@ pub fn run_submit_task_with_browser_backend( page_url, page_title, }; - let (instruction, deterministic_plan) = match resolve_submit_instruction( - instruction, - task_context.page_url.as_deref(), - task_context.page_title.as_deref(), - ) { - Ok(resolved) => resolved, - Err(completion) => return sink.send(&completion), - }; let _ = sink.send(&AgentMessage::LogEntry { level: "info".to_string(), message: runtime_version_log_message(), @@ -412,6 +411,15 @@ pub fn run_submit_task_with_browser_backend( Ok(Some(settings)) => { let resolved_skills_dir = resolve_skills_dir_from_sgclaw_settings(&context.workspace_root, &settings); + let (instruction, deterministic_plan) = match resolve_submit_instruction( + instruction, + task_context.page_url.as_deref(), + task_context.page_title.as_deref(), + &resolved_skills_dir, + ) { + Ok(resolved) => resolved, + Err(completion) => return sink.send(&completion), + }; let _ = sink.send(&AgentMessage::LogEntry { level: "info".to_string(), message: format!( diff --git a/src/bin/sg_claw_client.rs b/src/bin/sg_claw_client.rs index 0403f49..d2cff97 100644 --- a/src/bin/sg_claw_client.rs +++ b/src/bin/sg_claw_client.rs @@ -75,7 +75,10 @@ fn run() -> Result<(), String> { ServiceMessage::LogEntry { level: _, message } => { println!("{message}"); } - ServiceMessage::TaskComplete { success: _, summary } => { + ServiceMessage::TaskComplete { + success: _, + summary, + } => { println!("{summary}"); break; } diff --git a/src/bin/sg_scene_generate.rs b/src/bin/sg_scene_generate.rs index 85b9abc..6f93d68 100644 --- a/src/bin/sg_scene_generate.rs +++ b/src/bin/sg_scene_generate.rs @@ -2,7 +2,8 @@ use std::env; use std::path::PathBuf; use sgclaw::generated_scene::analyzer::SceneKind; -use sgclaw::generated_scene::generator::{generate_scene_package, GenerateSceneRequest, SceneInfoJson}; +use sgclaw::generated_scene::generator::{generate_scene_package, GenerateSceneRequest}; +use sgclaw::generated_scene::ir::{LegacySceneInfoJson, SceneIr}; fn main() { if let Err(err) = run() { @@ -13,10 +14,16 @@ fn main() { fn run() -> Result<(), String> { let args = parse_args(env::args().skip(1))?; - let scene_info: Option = args.scene_info_json + let scene_info: Option = args + .scene_info_json .map(|json| serde_json::from_str(&json)) .transpose() .map_err(|e| format!("Invalid scene-info-json: {}", e))?; + let scene_ir: Option = args + .scene_ir_json + .map(|json| serde_json::from_str(&json)) + .transpose() + .map_err(|e| format!("Invalid scene-ir-json: {}", e))?; let skill_root = generate_scene_package(GenerateSceneRequest { source_dir: args.source_dir, scene_id: args.scene_id, @@ -26,6 +33,7 @@ fn run() -> Result<(), String> { output_root: args.output_root, lessons_path: args.lessons_path, scene_info_json: scene_info, + scene_ir_json: scene_ir, }) .map_err(|err| err.to_string())?; @@ -42,6 +50,7 @@ struct CliArgs { output_root: PathBuf, lessons_path: Option, scene_info_json: Option, + scene_ir_json: Option, } fn parse_args(args: impl Iterator) -> Result { @@ -53,6 +62,7 @@ fn parse_args(args: impl Iterator) -> Result { let mut output_root = None; let mut lessons_path = None; let mut scene_info_json = None; + let mut scene_ir_json = None; let mut pending_flag: Option = None; for arg in args { @@ -71,6 +81,7 @@ fn parse_args(args: impl Iterator) -> Result { "--output-root" => output_root = Some(PathBuf::from(arg)), "--lessons" => lessons_path = Some(PathBuf::from(arg)), "--scene-info-json" => scene_info_json = Some(arg), + "--scene-ir-json" => scene_ir_json = Some(arg), _ => return Err(format!("unsupported argument {flag}")), } continue; @@ -78,7 +89,7 @@ fn parse_args(args: impl Iterator) -> Result { match arg.as_str() { "--source-dir" | "--scene-id" | "--scene-name" | "--scene-kind" | "--target-url" - | "--output-root" | "--lessons" | "--scene-info-json" => { + | "--output-root" | "--lessons" | "--scene-info-json" | "--scene-ir-json" => { pending_flag = Some(arg); } "--help" | "-h" => return Err(usage()), @@ -99,9 +110,10 @@ fn parse_args(args: impl Iterator) -> Result { output_root: output_root.ok_or_else(usage)?, lessons_path, scene_info_json, + scene_ir_json, }) } fn usage() -> String { - "usage: sg_scene_generate --source-dir --scene-id --scene-name [--scene-kind ] [--target-url ] --output-root [--lessons ] [--scene-info-json '']".to_string() + "usage: sg_scene_generate --source-dir --scene-id --scene-name [--scene-kind ] [--target-url ] --output-root [--lessons ] [--scene-info-json ''] [--scene-ir-json '']".to_string() } diff --git a/src/browser/bridge_backend.rs b/src/browser/bridge_backend.rs index fe4366b..36be6a4 100644 --- a/src/browser/bridge_backend.rs +++ b/src/browser/bridge_backend.rs @@ -1,5 +1,5 @@ -use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; use serde_json::Value; diff --git a/src/browser/callback_backend.rs b/src/browser/callback_backend.rs index 800daaa..e232ea2 100644 --- a/src/browser/callback_backend.rs +++ b/src/browser/callback_backend.rs @@ -18,7 +18,10 @@ const LOCAL_DASHBOARD_SOURCE: &str = "compat.workflow_executor"; const LOCAL_DASHBOARD_KIND_ZHIHU_HOTLIST_SCREEN: &str = "zhihu_hotlist_screen"; pub trait BrowserCallbackHost: Send + Sync { - fn execute(&self, request: BrowserCallbackRequest) -> Result; + fn execute( + &self, + request: BrowserCallbackRequest, + ) -> Result; } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -183,9 +186,13 @@ impl BrowserCallbackBackend { self.current_target_url .lock() - .map_err(|_| PipeError::Protocol("callback backend target url lock poisoned".to_string()))? + .map_err(|_| { + PipeError::Protocol("callback backend target url lock poisoned".to_string()) + })? .clone() - .ok_or_else(|| PipeError::Protocol(format!("target_url is required for {}", action.as_str()))) + .ok_or_else(|| { + PipeError::Protocol(format!("target_url is required for {}", action.as_str())) + }) } fn execute_simulated_click( @@ -194,10 +201,9 @@ impl BrowserCallbackBackend { expected_domain: &str, success: &BrowserCallbackSuccess, ) -> Result { - let probe = success - .data - .get("probe") - .ok_or_else(|| PipeError::Protocol("callback click probe payload missing".to_string()))?; + let probe = success.data.get("probe").ok_or_else(|| { + PipeError::Protocol("callback click probe payload missing".to_string()) + })?; let x = probe .get("x") .and_then(Value::as_f64) @@ -248,10 +254,9 @@ impl BrowserCallbackBackend { params: &Value, success: &BrowserCallbackSuccess, ) -> Result { - let probe = success - .data - .get("probe") - .ok_or_else(|| PipeError::Protocol("callback type probe payload missing".to_string()))?; + let probe = success.data.get("probe").ok_or_else(|| { + PipeError::Protocol("callback type probe payload missing".to_string()) + })?; let x = probe .get("x") .and_then(Value::as_f64) @@ -307,7 +312,8 @@ impl BrowserBackend for BrowserCallbackBackend { params: Value, expected_domain: &str, ) -> Result { - if let Some(local_dashboard) = approved_local_dashboard_request(&action, ¶ms, expected_domain) + if let Some(local_dashboard) = + approved_local_dashboard_request(&action, ¶ms, expected_domain) { self.mac_policy .validate_local_dashboard_presentation( @@ -335,7 +341,9 @@ impl BrowserBackend for BrowserCallbackBackend { match reply { BrowserCallbackResponse::Success(success) => { let success = match action { - Action::Click => self.execute_simulated_click(seq, expected_domain, &success)?, + Action::Click => { + self.execute_simulated_click(seq, expected_domain, &success)? + } Action::Type => { self.execute_simulated_type(seq, expected_domain, ¶ms, &success)? } @@ -349,7 +357,9 @@ impl BrowserBackend for BrowserCallbackBackend { .filter(|value| !value.is_empty()) { *self.current_target_url.lock().map_err(|_| { - PipeError::Protocol("callback backend target url lock poisoned".to_string()) + PipeError::Protocol( + "callback backend target url lock poisoned".to_string(), + ) })? = Some(url.to_string()); } } @@ -524,10 +534,7 @@ fn events_endpoint_url(helper_page_url: &str) -> String { /// Extract the domain from a URL. /// e.g. "https://www.zhihu.com/hot" → "www.zhihu.com" fn extract_domain(url: &str) -> Result { - let after_scheme = url - .find("://") - .map(|i| &url[i + 3..]) - .unwrap_or(url); + let after_scheme = url.find("://").map(|i| &url[i + 3..]).unwrap_or(url); let domain = after_scheme .split('/') .next() @@ -627,7 +634,10 @@ mod tests { } impl BrowserCallbackHost for FakeCallbackHost { - fn execute(&self, request: BrowserCallbackRequest) -> Result { + fn execute( + &self, + request: BrowserCallbackRequest, + ) -> Result { self.requests.lock().unwrap().push(request); self.replies .lock() @@ -674,15 +684,18 @@ mod tests { assert!(output.success); let requests = host.requests(); assert_eq!(requests.len(), 2); - assert_eq!(requests[1].command, json!([ - "http://127.0.0.1:17888/sgclaw/browser-helper.html", - "sgBroewserSimulateMouse", - 320.5, - 240.25, - "left", - "", - "" - ])); + assert_eq!( + requests[1].command, + json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateMouse", + 320.5, + 240.25, + "left", + "", + "" + ]) + ); } #[test] @@ -740,21 +753,27 @@ mod tests { let requests = host.requests(); assert_eq!(requests.len(), 2); assert_eq!(requests[0].action, "click"); - assert_eq!(requests[0].command[1], json!("sgBrowserExcuteJsCodeByDomain")); + assert_eq!( + requests[0].command[1], + json!("sgBrowserExcuteJsCodeByDomain") + ); assert_eq!(requests[0].command[2], json!("zhuanlan.zhihu.com")); let script = requests[0].command[3].as_str().unwrap(); assert!(script.contains("document.querySelector('button')")); assert!(script.contains("sgclawOnClick")); assert_eq!(requests[1].action, "click"); - assert_eq!(requests[1].command, json!([ - "http://127.0.0.1:17888/sgclaw/browser-helper.html", - "sgBroewserSimulateMouse", - 320.5, - 240.25, - "left", - "", - "" - ])); + assert_eq!( + requests[1].command, + json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateMouse", + 320.5, + 240.25, + "left", + "", + "" + ]) + ); } #[test] @@ -783,13 +802,16 @@ mod tests { assert!(output.success); let requests = host.requests(); assert_eq!(requests.len(), 2); - assert_eq!(requests[1].command, json!([ - "http://127.0.0.1:17888/sgclaw/browser-helper.html", - "sgBroewserSimulateKeyborad", - 160.0, - 90.0, - "正文" - ])); + assert_eq!( + requests[1].command, + json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateKeyborad", + 160.0, + 90.0, + "正文" + ]) + ); } #[test] @@ -822,13 +844,16 @@ mod tests { let script = requests[0].command[3].as_str().unwrap(); assert!(script.contains("return document.body;")); assert!(!script.contains("selector not found: div[contenteditable='true']")); - assert_eq!(requests[1].command, json!([ - "http://127.0.0.1:17888/sgclaw/browser-helper.html", - "sgBroewserSimulateKeyborad", - 160.0, - 90.0, - "正文" - ])); + assert_eq!( + requests[1].command, + json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateKeyborad", + 160.0, + 90.0, + "正文" + ]) + ); } #[test] @@ -859,20 +884,26 @@ mod tests { let requests = host.requests(); assert_eq!(requests.len(), 2); assert_eq!(requests[0].action, "type"); - assert_eq!(requests[0].command[1], json!("sgBrowserExcuteJsCodeByDomain")); + assert_eq!( + requests[0].command[1], + json!("sgBrowserExcuteJsCodeByDomain") + ); assert_eq!(requests[0].command[2], json!("zhuanlan.zhihu.com")); let script = requests[0].command[3].as_str().unwrap(); assert!(script.contains("document.querySelector('div[contenteditable=\\'true\\']')")); assert!(script.contains("sgclawOnType")); assert!(!script.contains("el.value=")); assert_eq!(requests[1].action, "type"); - assert_eq!(requests[1].command, json!([ - "http://127.0.0.1:17888/sgclaw/browser-helper.html", - "sgBroewserSimulateKeyborad", - 160.0, - 90.0, - "正文" - ])); + assert_eq!( + requests[1].command, + json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateKeyborad", + 160.0, + 90.0, + "正文" + ]) + ); } #[test] @@ -905,11 +936,14 @@ mod tests { assert!(output.success); let requests = host.requests(); assert_eq!(requests.len(), 1); - assert_eq!(requests[0].command, json!([ - "http://127.0.0.1:17888/sgclaw/browser-helper.html", - "sgBrowerserOpenPage", - "file:///C:/tmp/zhihu-hotlist-screen.html" - ])); + assert_eq!( + requests[0].command, + json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBrowerserOpenPage", + "file:///C:/tmp/zhihu-hotlist-screen.html" + ]) + ); } #[test] @@ -945,9 +979,15 @@ mod tests { let raw = "第一行\n第二行\r\n第三行"; let escaped = escape_js_single_quoted(raw); assert!(!escaped.contains('\n'), "literal newline must be escaped"); - assert!(!escaped.contains('\r'), "literal carriage return must be escaped"); + assert!( + !escaped.contains('\r'), + "literal carriage return must be escaped" + ); assert!(escaped.contains("\\n"), "should contain escaped newline"); - assert!(escaped.contains("\\r"), "should contain escaped carriage return"); + assert!( + escaped.contains("\\r"), + "should contain escaped carriage return" + ); assert_eq!(escaped, "第一行\\n第二行\\r\\n第三行"); } diff --git a/src/browser/callback_host.rs b/src/browser/callback_host.rs index 481ee0c..4405bdc 100644 --- a/src/browser/callback_host.rs +++ b/src/browser/callback_host.rs @@ -40,6 +40,12 @@ pub(crate) struct BrowserCallbackHost { state: Mutex, } +#[derive(Debug)] +pub(crate) struct CallbackHostStartupError { + pub(crate) source: PipeError, + pub(crate) logs: Vec, +} + #[derive(Debug)] #[allow(dead_code)] pub(crate) struct LiveBrowserCallbackHost { @@ -55,6 +61,8 @@ pub(crate) struct LiveBrowserCallbackHost { #[derive(Debug, Default)] struct CallbackHostState { ready: bool, + helper_loaded: bool, + startup_logs: Vec, pending_ready_event: Option, pending_results: VecDeque, pending_commands: VecDeque, @@ -136,7 +144,7 @@ impl BrowserCallbackHost { let origin = normalize_loopback_origin(loopback_origin.as_ref()); let browser_ws_url = browser_ws_url.as_ref().to_string(); let helper_url = format!("{origin}{HELPER_PAGE_PATH}"); - let helper_page_html = build_helper_page_html(&origin, &helper_url, &browser_ws_url); + let helper_page_html = build_runtime_console_html(&origin, &helper_url, &browser_ws_url); Self { helper_url, @@ -157,6 +165,20 @@ impl BrowserCallbackHost { self.state.lock().unwrap().ready } + pub(crate) fn helper_loaded(&self) -> bool { + self.state.lock().unwrap().helper_loaded + } + + pub(crate) fn record_startup_log(&self, message: impl Into) { + let message = message.into(); + eprintln!("[sgclaw callback-host startup] {message}"); + self.state.lock().unwrap().startup_logs.push(message); + } + + pub(crate) fn take_startup_logs(&self) -> Vec { + std::mem::take(&mut self.state.lock().unwrap().startup_logs) + } + pub(crate) fn mark_ready(&self, helper_url: Option) { let mut state = self.state.lock().unwrap(); if state.ready { @@ -164,6 +186,9 @@ impl BrowserCallbackHost { } state.ready = true; + state + .startup_logs + .push("callback-host helper ready callback received".to_string()); state.pending_ready_event = Some(CallbackEvent::Ready { helper_url }); } @@ -172,7 +197,21 @@ impl BrowserCallbackHost { } pub(crate) fn push_result(&self, result: CallbackResult) { - self.state.lock().unwrap().pending_results.push_back(result); + let mut state = self.state.lock().unwrap(); + eprintln!( + "[sgclaw callback-host] event received callback={} target_url={:?} action={:?} payload_keys={}", + result.callback, + result.target_url, + result.action, + payload_keys(&result.payload) + ); + if result.callback == NAVIGATE_CALLBACK_NAME && !state.helper_loaded { + state.helper_loaded = true; + state + .startup_logs + .push("callback-host helper loaded callback received".to_string()); + } + state.pending_results.push_back(result); } pub(crate) fn take_result(&self) -> Option { @@ -180,17 +219,36 @@ impl BrowserCallbackHost { } pub(crate) fn clear_results(&self) { - self.state.lock().unwrap().pending_results.clear(); + let mut state = self.state.lock().unwrap(); + let pending_results = state.pending_results.len(); + state.pending_results.clear(); + eprintln!("[sgclaw callback-host] clear_results pending_results_cleared={pending_results}"); } pub(crate) fn enqueue_command(&self, command: CallbackCommand) { - self.state.lock().unwrap().pending_commands.push_back(command); + let action = command.action.clone(); + let args_len = command.args.len(); + let mut state = self.state.lock().unwrap(); + state.pending_commands.push_back(command); + eprintln!( + "[sgclaw callback-host] enqueue_command action={action} args_len={args_len} pending_commands={} in_flight={}", + state.pending_commands.len(), + state.in_flight_command.is_some() + ); } pub(crate) fn current_command_envelope(&self) -> CallbackCommandEnvelope { let mut state = self.state.lock().unwrap(); if state.in_flight_command.is_none() { state.in_flight_command = state.pending_commands.pop_front(); + if let Some(command) = state.in_flight_command.as_ref() { + eprintln!( + "[sgclaw callback-host] helper picked command action={} args_len={} remaining_pending={}", + command.action, + command.args.len(), + state.pending_commands.len() + ); + } } CallbackCommandEnvelope { @@ -200,7 +258,18 @@ impl BrowserCallbackHost { } pub(crate) fn acknowledge_in_flight_command(&self) -> Option { - self.state.lock().unwrap().in_flight_command.take() + let mut state = self.state.lock().unwrap(); + let command = state.in_flight_command.take(); + if let Some(command) = command.as_ref() { + eprintln!( + "[sgclaw callback-host] helper ack command action={} args_len={}", + command.action, + command.args.len() + ); + } else { + eprintln!("[sgclaw callback-host] helper ack with no in-flight command"); + } + command } /// Clear all pending state so the host can be reused for the next task @@ -210,6 +279,7 @@ impl BrowserCallbackHost { state.pending_results.clear(); state.pending_commands.clear(); state.in_flight_command = None; + state.startup_logs.clear(); } } @@ -220,29 +290,68 @@ impl LiveBrowserCallbackHost { ready_timeout: Duration, result_timeout: Duration, use_hidden_domain: bool, - ) -> Result { - let listener = TcpListener::bind("127.0.0.1:0").map_err(|err| { - PipeError::Protocol(format!("failed to bind callback host listener: {err}")) - })?; - listener.set_nonblocking(true).map_err(|err| { - PipeError::Protocol(format!("failed to configure callback host listener: {err}")) - })?; + ) -> Result { + let listener = + TcpListener::bind("127.0.0.1:0").map_err(|err| CallbackHostStartupError { + source: PipeError::Protocol(format!( + "failed to bind callback host listener: {err}" + )), + logs: vec![ + "callback-host start_with_browser_ws_url begin".to_string(), + format!("callback-host listener bind failed: {err}"), + ], + })?; + listener + .set_nonblocking(true) + .map_err(|err| CallbackHostStartupError { + source: PipeError::Protocol(format!( + "failed to configure callback host listener: {err}" + )), + logs: vec![ + "callback-host start_with_browser_ws_url begin".to_string(), + format!("callback-host listener configure failed: {err}"), + ], + })?; let origin = format!( "http://{}", - listener.local_addr().map_err(|err| { - PipeError::Protocol(format!( - "failed to resolve callback host listener address: {err}" - )) - })? + listener + .local_addr() + .map_err(|err| CallbackHostStartupError { + source: PipeError::Protocol(format!( + "failed to resolve callback host listener address: {err}" + )), + logs: vec![ + "callback-host start_with_browser_ws_url begin".to_string(), + format!("callback-host listener address resolve failed: {err}"), + ], + })? ); let host = Arc::new(BrowserCallbackHost::with_urls(&origin, browser_ws_url)); + host.record_startup_log("callback-host start_with_browser_ws_url begin"); + host.record_startup_log("callback-host listener ready"); let shutdown = Arc::new(AtomicBool::new(false)); let thread_host = host.clone(); let thread_shutdown = shutdown.clone(); - let server_thread = thread::spawn(move || serve_loop(listener, thread_host, thread_shutdown)); + let server_thread = + thread::spawn(move || serve_loop(listener, thread_host, thread_shutdown)); - bootstrap_helper_page(browser_ws_url, bootstrap_request_url, host.helper_url(), use_hidden_domain)?; - wait_for_helper_ready(host.as_ref(), ready_timeout)?; + bootstrap_helper_page( + host.as_ref(), + browser_ws_url, + bootstrap_request_url, + host.helper_url(), + use_hidden_domain, + ) + .map_err(|source| CallbackHostStartupError { + source, + logs: host.take_startup_logs(), + })?; + wait_for_helper_ready(host.as_ref(), ready_timeout).map_err(|source| { + CallbackHostStartupError { + source, + logs: host.take_startup_logs(), + } + })?; let live_host = Self { host, @@ -281,10 +390,18 @@ fn command_is_fire_and_forget(request: &BrowserCallbackRequest) -> bool { } impl BrowserCallbackExecutor for LiveBrowserCallbackHost { - fn execute(&self, request: BrowserCallbackRequest) -> Result { + fn execute( + &self, + request: BrowserCallbackRequest, + ) -> Result { let _command_guard = self.command_lock.lock().unwrap(); + eprintln!( + "[sgclaw callback-host] execute begin seq={} action={} expected_domain={} request_url={}", + request.seq, request.action, request.expected_domain, request.request_url + ); self.host.clear_results(); - self.host.enqueue_command(command_from_request(&request.command)?); + self.host + .enqueue_command(command_from_request(&request.command)?); // Navigate uses sgBrowerserOpenPage which opens a new tab without a JS // callback. Simulated mouse/keyboard follow-up commands also do not emit @@ -299,18 +416,40 @@ impl BrowserCallbackExecutor for LiveBrowserCallbackHost { }; let started = Instant::now(); + let mut saw_unmatched_result = false; while started.elapsed() < timeout { if let Some(result) = self.host.take_result() { - if let Some(response) = - normalize_callback_result(&request, result, started.elapsed()) - { + let callback = result.callback.clone(); + let response = normalize_callback_result(&request, result, started.elapsed()); + if let Some(response) = response { + eprintln!( + "[sgclaw callback-host] execute matched callback seq={} action={} callback={} elapsed_ms={}", + request.seq, + request.action, + callback, + started.elapsed().as_millis() + ); return Ok(response); } + saw_unmatched_result = true; + eprintln!( + "[sgclaw callback-host] execute ignored unmatched callback seq={} action={} callback={} elapsed_ms={}", + request.seq, + request.action, + callback, + started.elapsed().as_millis() + ); } thread::sleep(COMMAND_POLL_INTERVAL); } if is_fire_and_forget { + eprintln!( + "[sgclaw callback-host] execute fire-and-forget complete seq={} action={} elapsed_ms={}", + request.seq, + request.action, + started.elapsed().as_millis() + ); return Ok(BrowserCallbackResponse::Success(BrowserCallbackSuccess { success: true, data: json!({ "loaded": true }), @@ -319,6 +458,13 @@ impl BrowserCallbackExecutor for LiveBrowserCallbackHost { })); } + eprintln!( + "[sgclaw callback-host] execute timeout seq={} action={} timeout_ms={} saw_unmatched_result={}", + request.seq, + request.action, + timeout.as_millis(), + saw_unmatched_result + ); Err(PipeError::Timeout) } } @@ -343,11 +489,13 @@ fn normalize_loopback_origin(origin: &str) -> String { } fn bootstrap_helper_page( + host: &BrowserCallbackHost, browser_ws_url: &str, request_url: &str, helper_url: &str, use_hidden_domain: bool, ) -> Result<(), PipeError> { + host.record_startup_log("callback-host bootstrap_helper_page begin"); let (mut websocket, _) = connect(browser_ws_url) .map_err(|err| PipeError::Protocol(format!("browser websocket connect failed: {err}")))?; configure_bootstrap_socket(&mut websocket)?; @@ -368,15 +516,11 @@ fn bootstrap_helper_page( let close_payload = json!([request_url, close_action, helper_url]).to_string(); let _ = websocket.send(Message::Text(close_payload.into())); - let payload = json!([ - request_url, - open_action, - helper_url, - ]) - .to_string(); + let payload = json!([request_url, open_action, helper_url]).to_string(); websocket .send(Message::Text(payload.into())) .map_err(|err| PipeError::Protocol(format!("helper bootstrap send failed: {err}")))?; + host.record_startup_log("callback-host bootstrap hidden-open sent"); Ok(()) } @@ -386,9 +530,11 @@ fn recv_bootstrap_prelude( loop { match websocket.read() { Ok(Message::Text(_)) | Ok(Message::Binary(_)) | Ok(Message::Frame(_)) => return Ok(()), - Ok(Message::Ping(payload)) => websocket - .send(Message::Pong(payload)) - .map_err(|err| PipeError::Protocol(format!("browser websocket pong failed: {err}")))?, + Ok(Message::Ping(payload)) => { + websocket.send(Message::Pong(payload)).map_err(|err| { + PipeError::Protocol(format!("browser websocket pong failed: {err}")) + })? + } Ok(Message::Pong(_)) => {} Ok(Message::Close(_)) => return Err(PipeError::PipeClosed), Err(tungstenite::Error::ConnectionClosed) | Err(tungstenite::Error::AlreadyClosed) => { @@ -424,15 +570,25 @@ fn configure_bootstrap_socket( } } -fn wait_for_helper_ready(host: &BrowserCallbackHost, ready_timeout: Duration) -> Result<(), PipeError> { +fn wait_for_helper_ready( + host: &BrowserCallbackHost, + ready_timeout: Duration, +) -> Result<(), PipeError> { + host.record_startup_log("callback-host wait_for_helper_ready begin"); let started = Instant::now(); while started.elapsed() < ready_timeout { if host.is_ready() { + host.record_startup_log("callback-host wait_for_helper_ready ready"); return Ok(()); } thread::sleep(HELPER_POLL_INTERVAL); } + host.record_startup_log(format!( + "callback-host wait_for_helper_ready timeout (helper_loaded={}, ready={})", + host.helper_loaded(), + host.is_ready() + )); Err(PipeError::Timeout) } @@ -469,16 +625,18 @@ fn handle_request(stream: &mut TcpStream, host: &BrowserCallbackHost) -> Result< host.helper_page_html().as_bytes(), ), ("POST", READY_ENDPOINT_PATH) => { - let payload: IncomingReadyEvent = serde_json::from_slice(&request.body).map_err(|err| { - PipeError::Protocol(format!("invalid callback host ready payload: {err}")) - })?; + let payload: IncomingReadyEvent = + serde_json::from_slice(&request.body).map_err(|err| { + PipeError::Protocol(format!("invalid callback host ready payload: {err}")) + })?; host.mark_ready(payload.helper_url); write_json_response(stream, &json!({ "ok": true })) } ("POST", EVENTS_ENDPOINT_PATH) => { - let payload: IncomingCallbackEvent = serde_json::from_slice(&request.body).map_err(|err| { - PipeError::Protocol(format!("invalid callback host event payload: {err}")) - })?; + let payload: IncomingCallbackEvent = + serde_json::from_slice(&request.body).map_err(|err| { + PipeError::Protocol(format!("invalid callback host event payload: {err}")) + })?; host.push_result(CallbackResult { callback: payload.callback, request_url: payload.request_url, @@ -507,7 +665,9 @@ fn read_http_request(stream: &mut TcpStream) -> Result { while headers_end.is_none() { let mut chunk = [0_u8; 1024]; let bytes = stream.read(&mut chunk).map_err(|err| { - PipeError::Protocol(format!("failed to read callback host request headers: {err}")) + PipeError::Protocol(format!( + "failed to read callback host request headers: {err}" + )) })?; if bytes == 0 { return Err(PipeError::PipeClosed); @@ -590,7 +750,9 @@ fn write_http_response( .write_all(headers.as_bytes()) .and_then(|_| stream.write_all(body)) .and_then(|_| stream.flush()) - .map_err(|err| PipeError::Protocol(format!("failed to write callback host response: {err}"))) + .map_err(|err| { + PipeError::Protocol(format!("failed to write callback host response: {err}")) + }) } fn write_cors_preflight(stream: &mut TcpStream) -> Result<(), PipeError> { @@ -604,12 +766,27 @@ fn write_cors_preflight(stream: &mut TcpStream) -> Result<(), PipeError> { stream .write_all(headers.as_bytes()) .and_then(|_| stream.flush()) - .map_err(|err| PipeError::Protocol(format!("failed to write CORS preflight response: {err}"))) + .map_err(|err| { + PipeError::Protocol(format!("failed to write CORS preflight response: {err}")) + }) +} + +fn payload_keys(payload: &Value) -> String { + match payload { + Value::Object(map) => map.keys().cloned().collect::>().join(","), + Value::Array(values) => format!("array({})", values.len()), + Value::String(_) => "string".to_string(), + Value::Number(_) => "number".to_string(), + Value::Bool(_) => "bool".to_string(), + Value::Null => "null".to_string(), + } } fn command_from_request(command: &Value) -> Result { let values = command.as_array().ok_or_else(|| { - PipeError::Protocol(format!("callback host command must be an array, got {command}")) + PipeError::Protocol(format!( + "callback host command must be an array, got {command}" + )) })?; if values.len() < 2 { return Err(PipeError::Protocol(format!( @@ -621,7 +798,9 @@ fn command_from_request(command: &Value) -> Result { .map(str::trim) .filter(|value| !value.is_empty()) .ok_or_else(|| { - PipeError::Protocol(format!("callback host command action is invalid: {command}")) + PipeError::Protocol(format!( + "callback host command action is invalid: {command}" + )) })? .to_string(); Ok(CallbackCommand { @@ -663,7 +842,11 @@ fn normalize_callback_result( "type" if result.callback == TYPE_PROBE_CALLBACK_NAME => { let x = result.payload.get("x").and_then(Value::as_f64)?; let y = result.payload.get("y").and_then(Value::as_f64)?; - let text = result.payload.get("text").and_then(Value::as_str).unwrap_or_default(); + let text = result + .payload + .get("text") + .and_then(Value::as_str) + .unwrap_or_default(); Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { success: true, data: json!({ @@ -734,7 +917,10 @@ fn normalize_callback_result( let probe: Value = serde_json::from_str(&parsed.response_text).ok()?; let x = probe.get("x").and_then(Value::as_f64)?; let y = probe.get("y").and_then(Value::as_f64)?; - let text = probe.get("text").and_then(Value::as_str).unwrap_or_default(); + let text = probe + .get("text") + .and_then(Value::as_str) + .unwrap_or_default(); Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { success: true, data: json!({ @@ -806,6 +992,182 @@ fn elapsed_timing(elapsed: Duration) -> Timing { fn build_helper_page_html(loopback_origin: &str, helper_url: &str, browser_ws_url: &str) -> String { format!( r#" + + + +sgClaw Browser Helper + + + + + + +"# + ) +} + +fn build_runtime_console_html( + loopback_origin: &str, + helper_url: &str, + browser_ws_url: &str, +) -> String { + format!( + r#" sgClaw · Runtime Console