Files
claw/frontend/scene-generator/generator-runner.js
木炎 4215d49f3f fix(analyzer): extract loginPath/mainPath as bootstrap fallback
When HTML/JS contains loginPath or mainPath variables (common in
95598 and similar scenes), extract the domain as expected_domain and
the full URL as target_url. This fixes the bootstrap_resolved gate
failure for scenes that use loginPath/mainPath instead of meta tags
or explicit bootstrap configuration.

🤖 Generated with [Qoder][https://qoder.com]
2026-04-17 19:44:13 +08:00

1403 lines
43 KiB
JavaScript

const fs = require("fs");
const path = require("path");
const { spawn } = require("child_process");
const TEXT_FILE_EXTENSIONS = new Set([
".html",
".js",
".jsx",
".json",
".md",
".mjs",
".toml",
".ts",
".tsx",
".txt",
".vue",
]);
const DEFAULT_BOOTSTRAP = {
expectedDomain: "",
targetUrl: "",
requiresTargetPage: true,
pageTitleKeywords: [],
source: "deterministic",
};
const BOOTSTRAP_ROLE_PRIORITY = ["business_entry", "business_api", "gateway_api"];
const GENERIC_SCENE_IDS = new Set([
"scene",
"report",
"generated",
"generated-scene",
"skill",
"default-scene",
]);
const GENERIC_SCENE_ID_TOKENS = new Set([
"api",
"app",
"data",
"default",
"export",
"generator",
"page",
"report",
"request",
"scene",
"skill",
"system",
"table",
"temp",
"test",
"tmp",
]);
const SCENE_ID_ALIAS_RULES = [
{ pattern: /营销\s*2(?:\.|。)?0/gi, replacement: "marketing" },
{ pattern: /零度户报表数据生成/gi, replacement: "zero-consumer-report" },
{ pattern: /零度户报表/gi, replacement: "zero-consumer-report" },
{ pattern: /零度户/gi, replacement: "zero-consumer" },
{ pattern: /台区线损率统计分析/gi, replacement: "tq-lineloss-analysis" },
{ pattern: /台区线损/gi, replacement: "tq-lineloss" },
{ pattern: /线损率统计分析/gi, replacement: "lineloss-analysis" },
{ pattern: /线损率/gi, replacement: "lineloss-rate" },
{ pattern: /线损/gi, replacement: "lineloss" },
{ pattern: /台区/gi, replacement: "tq" },
{ pattern: /月[_-]?周累计/gi, replacement: "monthly-weekly-cumulative" },
{ pattern: /月累计/gi, replacement: "monthly-cumulative" },
{ pattern: /周累计/gi, replacement: "weekly-cumulative" },
{ pattern: /统计分析/gi, replacement: "analysis" },
{ pattern: /报表数据生成/gi, replacement: "report" },
{ pattern: /报表/gi, replacement: "report" },
{ pattern: /数据生成/gi, replacement: "generator" },
];
function normalizePath(inputPath) {
return inputPath.replace(/\\/g, "/");
}
function runGenerator(params, sseWriter, projectRoot) {
const {
sourceDir,
sceneId,
sceneName,
sceneKind,
targetUrl,
outputRoot,
lessons,
sceneInfoJson,
sceneIrJson,
completionMeta,
} = params;
const args = [
"run",
"--bin",
"sg_scene_generate",
"--",
"--source-dir",
normalizePath(sourceDir),
"--scene-id",
sceneId,
"--scene-name",
sceneName,
];
if (sceneKind) {
args.push("--scene-kind", sceneKind);
}
if (targetUrl) {
args.push("--target-url", targetUrl);
}
args.push("--output-root", normalizePath(outputRoot));
if (lessons) {
args.push("--lessons", normalizePath(lessons));
}
if (sceneInfoJson) {
args.push("--scene-info-json", sceneInfoJson);
}
if (sceneIrJson) {
args.push("--scene-ir-json", sceneIrJson);
}
return new Promise((resolve, reject) => {
sseWriter.write(
`event: status\ndata: ${JSON.stringify({
message: "Starting skill generation...",
})}\n\n`
);
sseWriter.write(
`event: status\ndata: ${JSON.stringify({
message: `Running: cargo ${args.join(" ")}`,
})}\n\n`
);
const child = spawn("cargo", args, {
cwd: projectRoot,
stdio: ["ignore", "pipe", "pipe"],
env: { ...process.env, RUST_BACKTRACE: "1" },
});
let stdout = "";
let stderr = "";
let timedOut = false;
const timeout = setTimeout(() => {
timedOut = true;
child.kill("SIGTERM");
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: "Generation timed out after 5 minutes.",
})}\n\n`
);
resolve({ success: false, error: "timeout" });
}, 5 * 60 * 1000);
child.stdout.on("data", (data) => {
const text = data.toString();
stdout += text;
sseWriter.write(
`event: log\ndata: ${JSON.stringify({ message: text.trim() })}\n\n`
);
});
child.stderr.on("data", (data) => {
const text = data.toString();
stderr += text;
sseWriter.write(
`event: log\ndata: ${JSON.stringify({ message: text.trim() })}\n\n`
);
});
child.on("close", (code) => {
clearTimeout(timeout);
if (timedOut) return;
if (code === 0) {
const match = stdout.match(/generated scene package:\s*(.+)/);
const skillRoot = match ? match[1] : null;
sseWriter.write(
`event: status\ndata: ${JSON.stringify({
message: "Skill generation completed.",
})}\n\n`
);
sseWriter.write(
`event: complete\ndata: ${JSON.stringify({
success: true,
skillRoot,
readiness: completionMeta?.readiness || null,
workflowArchetype: completionMeta?.workflowArchetype || null,
confidence: completionMeta?.confidence || 0,
})}\n\n`
);
resolve({ success: true, skillRoot });
} else {
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: `Generation failed (exit code ${code})`,
})}\n\n`
);
if (stderr.trim()) {
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: stderr.substring(0, 500),
})}\n\n`
);
}
resolve({ success: false, code, stderr });
}
});
child.on("error", (err) => {
clearTimeout(timeout);
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: `Failed to start cargo: ${err.message}`,
})}\n\n`
);
reject(err);
});
});
}
function readDirectory(sourceDir) {
if (!fs.existsSync(sourceDir)) {
throw new Error(`Directory not found: ${sourceDir}`);
}
const stat = fs.statSync(sourceDir);
if (!stat.isDirectory()) {
throw new Error(`Not a directory: ${sourceDir}`);
}
const files = [];
const treeLines = [];
walkDirectory(sourceDir, "", files, treeLines);
const result = {
tree: treeLines.join("\n"),
files,
};
for (const file of files) {
const baseName = path.basename(file.path);
if (baseName === "scene.toml") result["scene.toml"] = file.content;
if (baseName === "SKILL.toml") result["SKILL.toml"] = file.content;
if (baseName === "SKILL.md") result["SKILL.md"] = file.content;
if (baseName === "index.html" && !result.indexHtml) result.indexHtml = file.content;
}
const scripts = {};
for (const file of files) {
if (file.path.endsWith(".js")) {
scripts[file.path] = file.content;
}
}
if (Object.keys(scripts).length > 0) {
result.scripts = scripts;
}
result.analysisContext = buildAnalysisContext(sourceDir, result);
result.deterministic = buildDeterministicSceneIr(result.analysisContext, sourceDir);
return result;
}
function walkDirectory(rootDir, relativeDir, files, treeLines) {
const absoluteDir = relativeDir ? path.join(rootDir, relativeDir) : rootDir;
const entries = fs
.readdirSync(absoluteDir, { withFileTypes: true })
.sort((a, b) => a.name.localeCompare(b.name, "en"));
for (const entry of entries) {
const relativePath = relativeDir
? path.posix.join(normalizePath(relativeDir), entry.name)
: entry.name;
treeLines.push(`${entry.isDirectory() ? "[D]" : "[F]"} ${relativePath}`);
if (entry.isDirectory()) {
walkDirectory(rootDir, relativePath, files, treeLines);
continue;
}
const ext = path.extname(entry.name).toLowerCase();
if (!TEXT_FILE_EXTENSIONS.has(ext)) {
continue;
}
const absolutePath = path.join(absoluteDir, entry.name);
const stat = fs.statSync(absolutePath);
if (stat.size > 1024 * 1024) {
continue;
}
const content = fs.readFileSync(absolutePath, "utf-8");
files.push({
path: normalizePath(relativePath),
content,
});
}
}
function buildAnalysisContext(sourceDir, dirContents) {
const files = Array.isArray(dirContents.files) ? dirContents.files : [];
const indexHtml = dirContents.indexHtml || "";
const directorySummary = {
sourceDir: normalizePath(sourceDir),
tree: dirContents.tree || "(empty)",
files: files.map((file) => ({
path: file.path,
length: file.content.length,
})),
};
const THIRD_PARTY_JS_PATTERNS = [
"vue.js",
"vue.min.js",
"element-ui",
"elementui",
"axios",
"jquery",
"jquery.min.js",
"echarts",
"echarts.min.js",
];
const dirFiles = dirContents.files || [];
const businessJsFragments = [];
for (const file of dirFiles) {
if (!file.path.startsWith("js/") || !file.path.endsWith(".js")) continue;
const baseName = path.basename(file.path).toLowerCase();
if (THIRD_PARTY_JS_PATTERNS.some((pattern) => baseName.includes(pattern))) continue;
businessJsFragments.push({
path: file.path,
snippet: file.content.length > 600 ? file.content.slice(0, 600) : file.content,
});
}
return {
directorySummary,
indexHtmlChunks: chunkFile("index.html", indexHtml, 3000, 2),
urlFragments: collectFragments(files, isUrlFragment, 10),
requestFragments: collectFragments(files, isRequestFragment, 10),
branchingFragments: collectFragments(files, isBranchFragment, 8),
responseFragments: collectFragments(files, isResponseFragment, 8),
exportFragments: collectFragments(files, isExportFragment, 6),
bootstrapHints: collectBootstrapHints(files, indexHtml),
deterministicSignals: collectDeterministicSignals(files, indexHtml),
businessJsFragments,
};
}
function chunkFile(filePath, content, chunkSize, maxChunks) {
if (!content) return [];
const chunks = [];
let index = 0;
let offset = 0;
while (offset < content.length && chunks.length < maxChunks) {
chunks.push({
path: filePath,
index,
start: offset,
end: Math.min(offset + chunkSize, content.length),
content: content.slice(offset, offset + chunkSize),
});
index += 1;
offset += chunkSize;
}
return chunks;
}
function collectFragments(files, predicate, limit) {
const fragments = [];
for (const file of files) {
const fileFragments = extractLineFragments(file.path, file.content, predicate, limit - fragments.length);
fragments.push(...fileFragments);
if (fragments.length >= limit) {
break;
}
}
return fragments;
}
function extractLineFragments(filePath, content, predicate, remainingLimit) {
if (!content || remainingLimit <= 0) return [];
const fragments = [];
const lines = content.split(/\r?\n/);
const seen = new Set();
for (let i = 0; i < lines.length && fragments.length < remainingLimit; i += 1) {
if (!predicate(lines[i])) continue;
const start = Math.max(0, i - 3);
const end = Math.min(lines.length, i + 4);
const snippet = lines.slice(start, end).join("\n").trim();
if (!snippet || seen.has(snippet)) continue;
seen.add(snippet);
fragments.push({
path: filePath,
lineStart: start + 1,
lineEnd: end,
snippet: snippet.length > 1200 ? snippet.slice(0, 1200) : snippet,
});
}
return fragments;
}
function isUrlFragment(line) {
return /(https?:\/\/|['"`]\/[^'"`\s]+|url\s*:|fetch\s*\(|axios\.(get|post|request)|\$\.(ajax|get|post))/i.test(line);
}
function isRequestFragment(line) {
return /(contentType|JSON\.stringify|requestBody|requestData|payload|params|data\s*:|body\s*:|FormData|\bpage(Size|No)?\b|\brows\b)/i.test(line);
}
function isBranchFragment(line) {
return /\b(if|else if|switch|case)\b/.test(line) && /(mode|period|reportType|tab|status|scene|type)/i.test(line);
}
function isResponseFragment(line) {
return /(response|res|result)\.(data|content|rows|list|records|items)|\.map\(|\.filter\(|columnDefs|columns|normalize|transform/i.test(line);
}
function isExportFragment(line) {
return /(export|download|xlsx|csv|blob|saveAs|excel)/i.test(line);
}
function collectBootstrapHints(files, indexHtml) {
const hints = [];
const seen = new Set();
for (const match of indexHtml.matchAll(/<(a|form|iframe)[^>]+(?:href|action|src)=["']([^"']+)["']/gi)) {
const url = match[2];
if (url && !isStaticAssetUrl(url) && !seen.has(url)) {
seen.add(url);
hints.push({ type: match[1], url });
}
}
for (const file of files) {
const namedUrlMatches = file.content.matchAll(
/\b(sourceUrl|sourceURL|baseUrl|baseURL|targetUrl|requestUrl|apiUrl|gatewayUrl|loginPath|mainPath)\b\s*[:=]\s*(['"`])(https?:\/\/[^'"`\s]+)\2/gi
);
for (const match of namedUrlMatches) {
const url = match[3];
const type = String(match[1] || "").toLowerCase();
if (url && !seen.has(url)) {
seen.add(url);
// loginPath/mainPath are bootstrap hints — the domain is expected_domain
if (type === "loginpath" || type === "mainpath") {
const domain = new URL(url).hostname;
hints.push({ type: "expected_domain", value: domain, path: file.path });
hints.push({ type: "target_url", value: url, path: file.path });
} else {
hints.push({ type, url, path: file.path });
}
}
}
const matches = file.content.matchAll(/window\.open\((['"`])([^'"`]+)\1|location\.(?:href|assign|replace)\((['"`])([^'"`]+)\3/gi);
for (const match of matches) {
const url = match[2] || match[4];
if (url && !seen.has(url)) {
seen.add(url);
hints.push({ type: "navigation", url, path: file.path });
}
}
}
return hints.slice(0, 12);
}
function collectDeterministicSignals(files, indexHtml) {
const urls = new Map();
const methods = new Map();
const responsePaths = new Set();
const branchFields = new Set();
const modeValues = new Set();
const paginationVars = new Set();
const filterExpressions = new Set();
const entryMethods = new Set();
const exportMethods = new Set();
const secondaryRequestMethods = new Set();
const pageTitleKeywords = new Set();
const staticParams = {};
for (const file of files) {
const content = file.content;
for (const endpoint of extractEndpoints(content)) {
const key = `${endpoint.method}|${endpoint.url}`;
if (!urls.has(key)) {
urls.set(key, endpoint);
}
}
for (const match of content.matchAll(/\b(type|method)\s*:\s*['"`](GET|POST|PUT|DELETE|PATCH)['"`]/gi)) {
methods.set(match[2].toUpperCase(), true);
}
for (const match of content.matchAll(/\b(?:response|res|result)\.(data|content|rows|list|records|items)\b/g)) {
responsePaths.add(match[1]);
}
for (const match of content.matchAll(/\b(?:if|switch|case)\b[\s\S]{0,120}?(period_mode|reportType|mode|tab|sceneType|status|type)\b/gi)) {
branchFields.add(match[1]);
}
for (const match of content.matchAll(/\b(period_mode|reportType|mode|tab)\b[\s\S]{0,80}?['"`](month|week|day|detail|summary|list|chart)['"`]/gi)) {
modeValues.add(match[2]);
}
for (const match of content.matchAll(/\b(pageSize|pageNo|pageNum|page|rows|limit|offset)\b/g)) {
paginationVars.add(match[1]);
}
for (const match of content.matchAll(/\.filter\(\s*(.+?)\s*\)|if\s*\(([^)]*(?:!==|!=|===|==|>|<)[^)]*)\)/g)) {
const expr = (match[1] || match[2] || "").trim();
if (expr && expr.length <= 160) {
filterExpressions.add(expr);
}
}
for (const match of content.matchAll(/(?:function\s+|const\s+|let\s+|var\s+)([A-Za-z_$][\w$]*)\s*(?:=\s*(?:async\s*)?\(|\()/g)) {
const name = match[1];
if (/(query|search|load|fetch|init|mounted|created|getData)/i.test(name)) {
entryMethods.add(name);
}
if (/(export|download|excel|csv)/i.test(name)) {
exportMethods.add(name);
}
if (/(detail|charge|charges|info|details)/i.test(name)) {
secondaryRequestMethods.add(name);
}
}
for (const match of content.matchAll(/(?:title|document\.title)\s*[:=]\s*['"`]([^'"`]{2,40})['"`]/gi)) {
pageTitleKeywords.add(match[1]);
}
const staticParamMatches = content.matchAll(/\b(orgNo|orgCode|orgId|period_mode|reportType|pageSize|rows)\b\s*:\s*['"`]([^'"`\n]+)['"`]/gi);
for (const match of staticParamMatches) {
if (!(match[1] in staticParams)) {
staticParams[match[1]] = match[2];
}
}
}
for (const match of indexHtml.matchAll(/<title>([^<]{2,40})<\/title>/gi)) {
pageTitleKeywords.add(match[1].trim());
}
const bootstrapCandidates = collectBootstrapCandidates(files, indexHtml, Array.from(urls.values()));
return {
endpoints: Array.from(urls.values()),
methods: Array.from(methods.keys()),
responsePaths: Array.from(responsePaths),
branchFields: Array.from(branchFields),
modeValues: Array.from(modeValues),
paginationVars: Array.from(paginationVars),
filterExpressions: Array.from(filterExpressions).slice(0, 8),
entryMethods: Array.from(entryMethods).slice(0, 10),
exportMethods: Array.from(exportMethods).slice(0, 10),
secondaryRequestMethods: Array.from(secondaryRequestMethods).slice(0, 10),
pageTitleKeywords: Array.from(pageTitleKeywords).slice(0, 10),
staticParams,
bootstrapCandidates,
};
}
function extractEndpoints(content) {
const endpoints = [];
const seen = new Set();
const lines = content.split(/\r?\n/);
for (let index = 0; index < lines.length; index += 1) {
const line = lines[index];
if (!isUrlFragment(line)) continue;
const block = lines.slice(Math.max(0, index - 2), Math.min(lines.length, index + 5)).join("\n");
const urlMatch =
block.match(/\burl\s*:\s*(['"`])([^'"`]+)\1/i) ||
block.match(/fetch\s*\(\s*(['"`])([^'"`]+)\1/i) ||
block.match(/axios\.(?:get|post|request)\s*\(\s*(['"`])([^'"`]+)\1/i);
if (!urlMatch) continue;
const url = sanitizeUrl(urlMatch[2]);
if (!url) continue;
const methodMatch =
block.match(/\b(?:type|method)\s*:\s*(['"`])([A-Z]+)\1/i) ||
block.match(/\baxios\.post\s*\(/i) ||
block.match(/\baxios\.get\s*\(/i);
const method = methodMatch
? String(methodMatch[2] || (methodMatch[0].includes(".post") ? "POST" : "GET")).toUpperCase()
: "GET";
const contentTypeMatch = block.match(/\bcontentType\s*:\s*(['"`])([^'"`]+)\1/i);
const name = inferEndpointName(block, url, endpoints.length);
const role = classifyRequestRole(url);
const key = `${method}|${url}`;
if (seen.has(key)) continue;
seen.add(key);
endpoints.push({
name,
url,
role,
method,
contentType: contentTypeMatch ? contentTypeMatch[2] : null,
description: `Detected from source snippet around line ${index + 1}`,
});
}
return endpoints.slice(0, 12);
}
function sanitizeUrl(rawUrl) {
if (!rawUrl) return "";
const value = rawUrl.trim();
if (!value) return "";
if (isStaticAssetUrl(value)) return "";
if (isTemplateNoiseUrl(value)) return "";
if (/^(javascript:|data:|#)/i.test(value)) return "";
if (/\.js(\?|$)|\.css(\?|$)|\.png(\?|$)|\.svg(\?|$)/i.test(value)) return "";
return value;
}
function classifyRequestRole(rawUrl) {
const value = String(rawUrl || "").toLowerCase();
if (!value || isTemplateNoiseUrl(value)) return "template_noise";
if (value.includes("localhost") || value.includes("127.0.0.1")) {
return /(surfaceservices|reportservices|export)/i.test(value) ? "export_service" : "local_helper";
}
if (value.includes("gateway")) return "gateway_api";
return "business_api";
}
function inferEndpointName(block, url, index) {
const functionMatch = block.match(/(?:function|const|let|var)\s+([A-Za-z_$][\w$]*)/);
if (functionMatch) return functionMatch[1];
const pathParts = url.split(/[/?#]/).filter(Boolean);
return pathParts[pathParts.length - 1] || `endpoint_${index + 1}`;
}
function collectBootstrapCandidates(files, indexHtml, endpoints) {
const candidates = [];
const seen = new Set();
for (const endpoint of endpoints) {
const candidate = buildBootstrapCandidate(endpoint.url, "api_endpoint");
const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : "";
if (candidate && !seen.has(key)) {
seen.add(key);
candidates.push(candidate);
}
}
for (const hint of collectBootstrapHints(files, indexHtml)) {
const candidate = buildBootstrapCandidate(hint.url, hint.type);
const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : "";
if (candidate && !seen.has(key)) {
seen.add(key);
candidates.push(candidate);
}
}
return candidates.slice(0, 8);
}
function buildBootstrapCandidate(rawUrl, source) {
if (!rawUrl) return null;
try {
const url = new URL(rawUrl, "http://placeholder.local");
const role = classifyBootstrapRole(rawUrl, source, url);
if (!role) {
return null;
}
const isAbsolute = /^https?:\/\//i.test(rawUrl);
const targetUrl = isAbsolute
? role === "business_api" || role === "gateway_api"
? url.origin
: `${url.origin}${url.pathname}`
: rawUrl;
return {
expectedDomain: isAbsolute ? url.hostname : "",
targetUrl,
source,
role,
validForBootstrap: BOOTSTRAP_ROLE_PRIORITY.includes(role),
};
} catch (_) {
return null;
}
}
function classifyBootstrapRole(rawUrl, source, parsedUrl) {
const value = String(rawUrl || "").trim();
if (!value || isTemplateNoiseUrl(value)) return "template_noise";
if (isStaticAssetUrl(value)) return "static_asset";
if (/^(javascript:|data:|#)/i.test(value)) return "template_noise";
const isAbsolute = /^https?:\/\//i.test(value);
if (!isAbsolute) {
return value.startsWith("/") ? "business_entry" : "template_noise";
}
const hostname = String(parsedUrl?.hostname || "").toLowerCase();
const pathname = String(parsedUrl?.pathname || "");
if (hostname === "localhost" || hostname === "127.0.0.1") {
if (/(SurfaceServices|ReportServices|export)/i.test(pathname)) {
return "export_service";
}
return "local_helper";
}
if (/(gateway)/i.test(hostname) || /(gateway)/i.test(value)) {
return "gateway_api";
}
if (/(sourceurl|targeturl|navigation|form|iframe|a)/i.test(source)) {
return "business_entry";
}
if (/(apiurl|requesturl|baseurl|api_endpoint)/i.test(source) || /\/api\//i.test(pathname)) {
return "business_api";
}
return "business_entry";
}
function isTemplateNoiseUrl(rawUrl) {
return /\$\{[^}]+\}|%s|placeholder|not a valid/i.test(rawUrl);
}
function isStaticAssetUrl(rawUrl) {
return /(?:cdn|static|assets?)|(?:\.js|\.css|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.ico)(?:\?|$)/i.test(rawUrl);
}
function buildDeterministicSceneIr(context, sourceDir) {
const signals = context.deterministicSignals || {};
const sceneName = humanizeSceneName(path.basename(sourceDir));
const sceneIdDiagnostics = deriveSceneIdDiagnostics({ sourceDir, sceneName, signals });
const sceneId = sceneIdDiagnostics.selected || "";
const workflowArchetype = classifyWorkflowArchetype(signals);
const evidence = buildEvidence(signals, workflowArchetype);
const bootstrap = deriveBootstrap(signals);
const modes = buildModes(signals);
const workflowSteps = buildWorkflowSteps(signals, workflowArchetype);
const workflowEvidence = buildWorkflowEvidence(signals);
const responsePath = signals.responsePaths?.[0] || "";
const normalizeRules = buildNormalizeRules(signals);
const params = buildParams(signals, workflowArchetype);
const confidence = scoreConfidence(signals, workflowArchetype);
const readiness = buildReadiness({
sceneIdDiagnostics,
workflowArchetype,
bootstrap,
apiEndpoints: signals.endpoints || [],
params,
workflowSteps,
confidence,
});
return {
sceneId,
sceneIdDiagnostics,
sceneName,
sceneKind: "report_collection",
workflowArchetype,
bootstrap,
params,
modes,
defaultMode: modes[0] ? modes[0].name : null,
modeSwitchField: signals.branchFields?.find((field) => /mode|period/i.test(field)) || null,
workflowSteps,
workflowEvidence,
requestTemplate: {},
responsePath,
normalizeRules,
artifactContract: {
type: "report-artifact",
successStatus: ["ok", "partial", "empty"],
failureStatus: ["blocked", "error"],
},
validationHints: {
requiresTargetPage: true,
runtimeCompatible: params.every((param) =>
["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver)
),
manualCompletionRequired: readiness.level !== "A",
missingPieces: readiness.missingPieces.slice(),
},
evidence,
readiness,
apiEndpoints: signals.endpoints || [],
staticParams: signals.staticParams || {},
columnDefs: [],
confidence,
uncertainties: buildUncertainties(signals, workflowArchetype),
deterministicSignals: signals,
};
}
function deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }) {
const baseName = path.basename(sourceDir || "");
const candidates = [];
const seen = new Set();
function pushCandidate(value, source) {
const normalized = slugifyAscii(value);
if (!normalized || seen.has(normalized)) return;
seen.add(normalized);
const validation = validateSceneIdCandidate(normalized, { sceneName, sourceDir, signals });
candidates.push({
value: normalized,
source,
valid: validation.valid,
reason: validation.valid ? null : validation.reason,
});
}
pushCandidate(buildKeywordSceneId(baseName, sceneName, signals), "deterministic_keywords");
pushCandidate(buildAliasSceneId(baseName), "controlled_alias");
pushCandidate(slugifyAscii(baseName), "directory_slug");
const selectedCandidate = candidates.find((candidate) => candidate.valid) || candidates[0] || null;
return {
selected: selectedCandidate?.value || "",
candidateSource: selectedCandidate?.source || "",
valid: Boolean(selectedCandidate?.valid),
invalidReason: selectedCandidate && !selectedCandidate.valid ? selectedCandidate.reason : null,
candidates,
};
}
function buildKeywordSceneId(baseName, sceneName, signals) {
const aliasedCandidates = Array.from(
new Set(
[baseName, sceneName]
.filter(Boolean)
.map((value) => slugifyAscii(applySceneIdAliases(value)))
.filter(Boolean)
)
).sort((left, right) => right.length - left.length);
const aliased = aliasedCandidates[0] || "";
if (aliased && aliased.split("-").length >= 2) {
return aliased;
}
const tokens = extractBusinessTokens(
[baseName, sceneName]
.filter(Boolean)
.join(" "),
signals
);
if (!tokens.length) return "";
return tokens.slice(0, 5).join("-");
}
function buildAliasSceneId(baseName) {
if (!baseName) return "";
let value = String(baseName || "");
for (const rule of SCENE_ID_ALIAS_RULES) {
value = value.replace(rule.pattern, ` ${rule.replacement} `);
}
return slugifyAscii(value);
}
function extractBusinessTokens(rawText, signals = {}) {
const candidates = [];
const pushTokens = (value) => {
const slug = slugifyAscii(value);
if (!slug) return;
for (const token of slug.split("-")) {
if (!token || token.length < 2 || GENERIC_SCENE_ID_TOKENS.has(token)) continue;
candidates.push(token);
}
};
pushTokens(applySceneIdAliases(rawText));
for (const keyword of signals.pageTitleKeywords || []) {
pushTokens(applySceneIdAliases(keyword));
}
for (const endpoint of signals.endpoints || []) {
pushTokens(endpoint?.name);
const urlValue = String(endpoint?.url || "");
const segments = urlValue
.split(/[/?#=&._-]+/)
.filter(Boolean)
.slice(-4);
for (const segment of segments) {
pushTokens(segment);
}
}
return Array.from(new Set(candidates));
}
function applySceneIdAliases(value) {
let text = String(value || "");
for (const rule of SCENE_ID_ALIAS_RULES) {
text = text.replace(rule.pattern, ` ${rule.replacement} `);
}
return text;
}
function validateSceneIdCandidate(sceneId, { sceneName = "", sourceDir = "", signals = {} } = {}) {
const normalized = slugifyAscii(sceneId);
if (!normalized) {
return { valid: false, reason: "empty_scene_id" };
}
if (GENERIC_SCENE_IDS.has(normalized)) {
return { valid: false, reason: "generic_scene_id" };
}
const letters = (normalized.match(/[a-z]/g) || []).length;
const digits = (normalized.match(/\d/g) || []).length;
if (!letters) {
return { valid: false, reason: "numeric_only_scene_id" };
}
if ((normalized.length < 5 || letters < 3) && !normalized.includes("-")) {
return { valid: false, reason: "scene_id_too_short" };
}
if (digits > letters && letters < 4) {
return { valid: false, reason: "numeric_dominant_scene_id" };
}
const expectedTokens = extractBusinessTokens(
[sceneName, path.basename(sourceDir || "")]
.filter(Boolean)
.join(" "),
signals
).filter((token) => token.length >= 3);
if (expectedTokens.length) {
const matched = expectedTokens.some((token) => normalized.includes(token));
if (!matched) {
return { valid: false, reason: "scene_id_semantic_detached" };
}
}
return { valid: true, reason: null };
}
function classifyWorkflowArchetype(signals) {
const businessEndpoints = getBusinessEndpoints(signals);
const hasPagination = (signals.paginationVars || []).length > 0;
const hasSecondaryRequest =
(signals.secondaryRequestMethods || []).length > 0 || businessEndpoints.length >= 2;
const hasPostProcess =
(signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0;
if (hasPagination && hasSecondaryRequest && hasPostProcess) {
return "paginated_enrichment";
}
const hasModeBranch = (signals.branchFields || []).some((field) => /period_mode|reportType|tjzq|mode/i.test(field));
const hasModeValues = (signals.modeValues || []).length >= 2;
const hasModeDivergence = hasModeValues && businessEndpoints.length >= 2;
if (hasModeBranch && hasModeDivergence) {
return "multi_mode_request";
}
const pageStateSignals = [...(signals.entryMethods || []), ...(signals.filterExpressions || [])].join(" ");
if (/(state|status|ready|available|enabled)/i.test(pageStateSignals) && businessEndpoints.length <= 1) {
return "page_state_eval";
}
return "single_request_table";
}
function buildEvidence(signals, workflowArchetype) {
const evidence = [];
const businessEndpoints = getBusinessEndpoints(signals);
if (businessEndpoints.length > 0) {
evidence.push({
kind: "deterministic",
summary: `Detected ${businessEndpoints.length} business API endpoint(s).`,
source: "runner",
confidence: 0.9,
});
}
if ((signals.branchFields || []).length > 0) {
evidence.push({
kind: "deterministic",
summary: `Branch fields: ${signals.branchFields.join(", ")}`,
source: "runner",
confidence: 0.86,
});
}
if ((signals.paginationVars || []).length > 0) {
evidence.push({
kind: "deterministic",
summary: `Pagination vars: ${signals.paginationVars.join(", ")}`,
source: "runner",
confidence: 0.84,
});
}
if ((signals.secondaryRequestMethods || []).length > 0) {
evidence.push({
kind: "deterministic",
summary: `Secondary request methods: ${signals.secondaryRequestMethods.join(", ")}`,
source: "runner",
confidence: 0.82,
});
}
if ((signals.exportMethods || []).length > 0) {
evidence.push({
kind: "deterministic",
summary: `Export methods: ${signals.exportMethods.join(", ")}`,
source: "runner",
confidence: 0.78,
});
}
evidence.push({
kind: "classification",
summary: `Workflow archetype classified as ${workflowArchetype}.`,
source: "runner",
confidence: 0.72,
});
return evidence;
}
function deriveBootstrap(signals) {
const candidate = BOOTSTRAP_ROLE_PRIORITY
.map((role) =>
(signals.bootstrapCandidates || []).find(
(item) => item.role === role && item.validForBootstrap && item.targetUrl
)
)
.find(Boolean);
if (candidate) {
return {
expectedDomain: candidate.expectedDomain || "",
targetUrl: candidate.targetUrl || "",
requiresTargetPage: true,
pageTitleKeywords: signals.pageTitleKeywords || [],
source: candidate.source || "deterministic",
};
}
return {
...DEFAULT_BOOTSTRAP,
pageTitleKeywords: signals.pageTitleKeywords || [],
};
}
function getBusinessEndpoints(signals) {
return (signals.endpoints || []).filter((endpoint) =>
["business_api", "gateway_api", "business_entry"].includes(endpoint.role)
);
}
function buildModes(signals) {
const values = (signals.modeValues || []).slice(0, 4);
if (!values.length) return [];
return values.map((value, index) => ({
name: value,
label: value,
condition: {
field: signals.branchFields?.find((field) => /mode|period|tab|type/i.test(field)) || "period_mode",
operator: "equals",
value,
},
apiEndpoint: signals.endpoints?.[index] || signals.endpoints?.[0] || null,
columnDefs: [],
requestTemplate: {},
normalizeRules: buildNormalizeRules(signals),
responsePath: signals.responsePaths?.[0] || "",
}));
}
function buildWorkflowSteps(signals, workflowArchetype) {
const steps = [];
const businessEndpoints = getBusinessEndpoints(signals);
const primaryEndpoint = businessEndpoints[0]?.name || null;
const secondaryEndpoint = businessEndpoints[1]?.name || null;
if (workflowArchetype === "multi_mode_request") {
steps.push({
type: "request",
entry: signals.entryMethods?.[0] || null,
endpoint: primaryEndpoint,
description: "Select mode and query the matching endpoint.",
});
steps.push({
type: "transform",
description: "Normalize mode-specific rows into a shared artifact.",
});
return steps;
}
if (workflowArchetype === "paginated_enrichment") {
steps.push({
type: "paginate",
entry: signals.entryMethods?.[0] || null,
endpoint: primaryEndpoint,
description: "Iterate primary list pages.",
});
if ((signals.secondaryRequestMethods || []).length > 0 || secondaryEndpoint) {
steps.push({
type: "secondary_request",
entry: signals.secondaryRequestMethods?.[0] || signals.entryMethods?.[1] || null,
endpoint: secondaryEndpoint,
description: "Fetch per-row or batched detail data.",
});
}
if (signals.filterExpressions?.[0]) {
steps.push({
type: "filter",
expr: signals.filterExpressions[0],
description: "Apply business-side filtering.",
});
}
if (signals.exportMethods?.[0]) {
steps.push({
type: "export",
entry: signals.exportMethods[0],
description: "Prepare export payload or trigger download logic.",
});
}
return steps;
}
if (workflowArchetype === "page_state_eval") {
steps.push({
type: "page_state",
entry: signals.entryMethods?.[0] || null,
description: "Evaluate page state and derive readiness outcome.",
});
return steps;
}
steps.push({
type: "request",
entry: signals.entryMethods?.[0] || null,
endpoint: primaryEndpoint,
description: "Issue the primary scene request.",
});
steps.push({
type: "transform",
description: "Normalize the primary response.",
});
return steps;
}
function buildNormalizeRules(signals) {
return {
type: "validate_required",
requiredFields: [],
filterNull: true,
responseHints: signals.responsePaths || [],
};
}
function buildWorkflowEvidence(signals) {
return {
requestEntries: uniqueStringValues([
...(signals.entryMethods || []).slice(0, 3),
...getBusinessEndpoints(signals).map((endpoint) => endpoint.name).slice(0, 3),
]),
paginationFields: uniqueStringValues(signals.paginationVars || []),
secondaryRequestEntries: uniqueStringValues([
...(signals.secondaryRequestMethods || []),
...getBusinessEndpoints(signals).slice(1, 3).map((endpoint) => endpoint.name),
]),
postProcessSteps: uniqueStringValues([
...(signals.filterExpressions || []).map(() => "filter"),
...(signals.exportMethods || []).map(() => "export"),
]),
};
}
function buildParams(signals, workflowArchetype) {
const params = [];
const staticKeys = Object.keys(signals.staticParams || {});
if (staticKeys.some((key) => /org/i.test(key)) || workflowArchetype === "multi_mode_request") {
params.push({
name: "org",
resolver: "dictionary_entity",
required: true,
promptMissing: "Organization parameter is missing.",
promptAmbiguous: "Organization parameter is ambiguous.",
resolverConfig: {},
});
}
if (
(signals.branchFields || []).some((field) => /period|mode/i.test(field)) ||
staticKeys.some((key) => /period/i.test(key))
) {
params.push({
name: "period",
resolver: "month_week_period",
required: true,
promptMissing: "Period parameter is missing.",
promptAmbiguous: "Period parameter is ambiguous.",
resolverConfig: {},
});
}
if ((signals.paginationVars || []).some((value) => /pageSize|rows|limit/i.test(value))) {
params.push({
name: "page_size",
resolver: "literal_passthrough",
required: false,
promptMissing: "",
promptAmbiguous: "",
resolverConfig: {},
});
}
return params;
}
function buildUncertainties(signals, workflowArchetype) {
const issues = [];
if (!(signals.endpoints || []).length) {
issues.push("No API endpoint was detected deterministically.");
}
if (!signals.bootstrapCandidates?.some((candidate) => candidate.validForBootstrap)) {
issues.push("Bootstrap target URL is still inferred weakly.");
}
if (workflowArchetype === "paginated_enrichment" && getBusinessEndpoints(signals).length < 2) {
issues.push("Secondary enrichment request is not strongly confirmed.");
}
if (workflowArchetype === "paginated_enrichment" && !(signals.filterExpressions || []).length && !(signals.exportMethods || []).length) {
issues.push("Paginated enrichment is missing post-process evidence.");
}
return issues;
}
function scoreConfidence(signals, workflowArchetype) {
let score = 0.3;
if (getBusinessEndpoints(signals).length > 0) score += 0.18;
if ((signals.bootstrapCandidates || []).some((candidate) => candidate.validForBootstrap)) score += 0.12;
if ((signals.responsePaths || []).length > 0) score += 0.08;
if ((signals.entryMethods || []).length > 0) score += 0.06;
if (workflowArchetype === "multi_mode_request" && (signals.modeValues || []).length >= 2) score += 0.14;
if (
workflowArchetype === "paginated_enrichment" &&
(signals.paginationVars || []).length > 0 &&
(signals.secondaryRequestMethods || []).length > 0
) {
score += 0.14;
}
return Math.min(0.95, Number(score.toFixed(2)));
}
function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints, params, workflowSteps, confidence }) {
const risks = [];
const missingPieces = [];
const notes = [];
const businessApiEndpoints = (apiEndpoints || []).filter((endpoint) =>
["business_api", "gateway_api", "business_entry"].includes(endpoint.role)
);
if (!sceneIdDiagnostics?.valid) {
missingPieces.push("invalid_scene_id");
risks.push(`Scene id is invalid${sceneIdDiagnostics?.invalidReason ? `: ${sceneIdDiagnostics.invalidReason}` : "."}`);
}
if (!bootstrap.targetUrl && !bootstrap.expectedDomain) {
missingPieces.push("bootstrap_target");
risks.push("Business bootstrap target is not confirmed.");
} else if (!bootstrap.expectedDomain) {
risks.push("Expected domain is missing; host validation may be weak.");
}
if (!businessApiEndpoints.length) {
missingPieces.push("api_endpoint");
risks.push("No request endpoint detected.");
}
if (!workflowSteps.length) {
missingPieces.push("workflow_steps");
risks.push("Workflow steps are incomplete.");
}
if (workflowArchetype === "paginated_enrichment") {
const hasPaginate = workflowSteps.some((step) => step.type === "paginate");
const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request");
const hasPostProcess = workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type));
if (!hasPaginate) {
missingPieces.push("paginate_step");
risks.push("Paginated enrichment lacks pagination evidence.");
}
if (!hasSecondary || businessApiEndpoints.length < 2) {
missingPieces.push("secondary_request");
risks.push("Paginated enrichment lacks a strong secondary request signal.");
}
if (!hasPostProcess) {
missingPieces.push("post_process");
risks.push("Paginated enrichment lacks filter/transform/export evidence.");
}
}
if (workflowArchetype === "multi_mode_request" && !params.some((param) => param.name === "period")) {
risks.push("Mode-aware workflow is missing a resolved period parameter.");
}
if (confidence < 0.55) {
risks.push("Overall analysis confidence is low.");
}
let level = "A";
if (missingPieces.length > 0) {
level = missingPieces.length >= 2 ? "C" : "B";
} else if (risks.length > 1 || confidence < 0.7) {
level = "B";
}
if (level === "A") {
notes.push("Structure looks complete enough for direct trial.");
} else if (level === "B") {
notes.push("Generation should be reviewed before internal-network execution.");
} else {
notes.push("Manual completion is required before trial.");
}
const gates = [
{
name: "scene_id_valid",
passed: sceneIdDiagnostics?.valid !== false,
reason: sceneIdDiagnostics?.valid === false ? sceneIdDiagnostics.invalidReason || "invalid_scene_id" : null,
},
{
name: "bootstrap_resolved",
passed: Boolean(bootstrap.targetUrl || bootstrap.expectedDomain),
reason: bootstrap.targetUrl || bootstrap.expectedDomain ? null : "bootstrap_target",
},
{
name: "workflow_complete_for_archetype",
passed: !missingPieces.some((item) =>
["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item)
),
reason:
missingPieces.find((item) =>
["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item)
) || null,
},
{
name: "runtime_contract_compatible",
passed: params.every((param) =>
["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver)
),
reason: params.every((param) =>
["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver)
)
? null
: "runtime_contract_incompatible",
},
];
return {
level,
confidence,
gates,
risks,
missingPieces,
notes,
};
}
function slugifyAscii(value) {
return String(value || "")
.replace(/([a-z0-9])([A-Z])/g, "$1-$2")
.trim()
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "");
}
function humanizeSceneName(value) {
const raw = String(value || "").replace(/[-_]+/g, " ").trim();
if (!raw) return "Generated Scene";
return raw.replace(/\b\w/g, (char) => char.toUpperCase());
}
function uniqueStringValues(list) {
return Array.from(new Set((list || []).filter(Boolean)));
}
module.exports = {
buildAnalysisContext,
buildDeterministicSceneIr,
deriveSceneIdDiagnostics,
readDirectory,
runGenerator,
validateSceneIdCandidate,
};