Files
claw/frontend/scene-generator/generator-runner.js

1954 lines
63 KiB
JavaScript

const fs = require("fs");
const path = require("path");
const { spawn } = require("child_process");
const TEXT_FILE_EXTENSIONS = new Set([
".html",
".js",
".jsx",
".json",
".md",
".mjs",
".toml",
".ts",
".tsx",
".txt",
".vue",
]);
const DEFAULT_BOOTSTRAP = {
expectedDomain: "",
targetUrl: "",
requiresTargetPage: true,
pageTitleKeywords: [],
source: "deterministic",
};
const BOOTSTRAP_ROLE_PRIORITY = ["business_entry", "business_api", "gateway_api"];
const GENERIC_SCENE_IDS = new Set([
"scene",
"report",
"generated",
"generated-scene",
"skill",
"default-scene",
]);
const GENERIC_SCENE_ID_TOKENS = new Set([
"api",
"app",
"data",
"default",
"export",
"generator",
"page",
"report",
"request",
"scene",
"skill",
"system",
"table",
"temp",
"test",
"tmp",
]);
const SCENE_ID_ALIAS_RULES = [
{ pattern: /营销\s*2(?:\.|。)?0/gi, replacement: "marketing" },
{ pattern: /零度户报表数据生成/gi, replacement: "zero-consumer-report" },
{ pattern: /零度户报表/gi, replacement: "zero-consumer-report" },
{ pattern: /零度户/gi, replacement: "zero-consumer" },
{ pattern: /台区线损率统计分析/gi, replacement: "tq-lineloss-analysis" },
{ pattern: /台区线损/gi, replacement: "tq-lineloss" },
{ pattern: /线损率统计分析/gi, replacement: "lineloss-analysis" },
{ pattern: /线损率/gi, replacement: "lineloss-rate" },
{ pattern: /线损/gi, replacement: "lineloss" },
{ pattern: /台区/gi, replacement: "tq" },
{ pattern: /月[_-]?周累计/gi, replacement: "monthly-weekly-cumulative" },
{ pattern: /月累计/gi, replacement: "monthly-cumulative" },
{ pattern: /周累计/gi, replacement: "weekly-cumulative" },
{ pattern: /统计分析/gi, replacement: "analysis" },
{ pattern: /报表数据生成/gi, replacement: "report" },
{ pattern: /报表/gi, replacement: "report" },
{ pattern: /数据生成/gi, replacement: "generator" },
];
function normalizePath(inputPath) {
return inputPath.replace(/\\/g, "/");
}
function runGenerator(params, sseWriter, projectRoot) {
const {
sourceDir,
sceneId,
sceneName,
sceneKind,
targetUrl,
outputRoot,
lessons,
sceneInfoJson,
sceneIrJson,
completionMeta,
} = params;
const args = [
"run",
"--bin",
"sg_scene_generate",
"--",
"--source-dir",
normalizePath(sourceDir),
"--scene-id",
sceneId,
"--scene-name",
sceneName,
];
if (sceneKind) {
args.push("--scene-kind", sceneKind);
}
if (targetUrl) {
args.push("--target-url", targetUrl);
}
args.push("--output-root", normalizePath(outputRoot));
if (lessons) {
args.push("--lessons", normalizePath(lessons));
}
if (sceneInfoJson) {
args.push("--scene-info-json", sceneInfoJson);
}
if (sceneIrJson) {
args.push("--scene-ir-json", sceneIrJson);
}
return new Promise((resolve, reject) => {
sseWriter.write(
`event: status\ndata: ${JSON.stringify({
message: "Starting skill generation...",
})}\n\n`
);
sseWriter.write(
`event: status\ndata: ${JSON.stringify({
message: `Running: cargo ${args.join(" ")}`,
})}\n\n`
);
const child = spawn("cargo", args, {
cwd: projectRoot,
stdio: ["ignore", "pipe", "pipe"],
env: { ...process.env, RUST_BACKTRACE: "1" },
});
let stdout = "";
let stderr = "";
let timedOut = false;
const timeout = setTimeout(() => {
timedOut = true;
child.kill("SIGTERM");
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: "Generation timed out after 5 minutes.",
})}\n\n`
);
resolve({ success: false, error: "timeout" });
}, 5 * 60 * 1000);
child.stdout.on("data", (data) => {
const text = data.toString();
stdout += text;
sseWriter.write(
`event: log\ndata: ${JSON.stringify({ message: text.trim() })}\n\n`
);
});
child.stderr.on("data", (data) => {
const text = data.toString();
stderr += text;
sseWriter.write(
`event: log\ndata: ${JSON.stringify({ message: text.trim() })}\n\n`
);
});
child.on("close", (code) => {
clearTimeout(timeout);
if (timedOut) return;
if (code === 0) {
const match = stdout.match(/generated scene package:\s*(.+)/);
const skillRoot = match ? match[1] : null;
sseWriter.write(
`event: status\ndata: ${JSON.stringify({
message: "Skill generation completed.",
})}\n\n`
);
sseWriter.write(
`event: complete\ndata: ${JSON.stringify({
success: true,
skillRoot,
readiness: completionMeta?.readiness || null,
workflowArchetype: completionMeta?.workflowArchetype || null,
confidence: completionMeta?.confidence || 0,
})}\n\n`
);
resolve({ success: true, skillRoot });
} else {
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: `Generation failed (exit code ${code})`,
})}\n\n`
);
if (stderr.trim()) {
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: stderr.substring(0, 500),
})}\n\n`
);
}
resolve({ success: false, code, stderr });
}
});
child.on("error", (err) => {
clearTimeout(timeout);
sseWriter.write(
`event: error\ndata: ${JSON.stringify({
message: `Failed to start cargo: ${err.message}`,
})}\n\n`
);
reject(err);
});
});
}
function readDirectory(sourceDir) {
if (!fs.existsSync(sourceDir)) {
throw new Error(`Directory not found: ${sourceDir}`);
}
const stat = fs.statSync(sourceDir);
if (!stat.isDirectory()) {
throw new Error(`Not a directory: ${sourceDir}`);
}
const files = [];
const treeLines = [];
walkDirectory(sourceDir, "", files, treeLines);
const result = {
tree: treeLines.join("\n"),
files,
};
for (const file of files) {
const baseName = path.basename(file.path);
if (baseName === "scene.toml") result["scene.toml"] = file.content;
if (baseName === "SKILL.toml") result["SKILL.toml"] = file.content;
if (baseName === "SKILL.md") result["SKILL.md"] = file.content;
if (baseName === "index.html" && !result.indexHtml) result.indexHtml = file.content;
}
const scripts = {};
for (const file of files) {
if (file.path.endsWith(".js")) {
scripts[file.path] = file.content;
}
}
if (Object.keys(scripts).length > 0) {
result.scripts = scripts;
}
result.analysisContext = buildAnalysisContext(sourceDir, result);
result.deterministic = buildDeterministicSceneIr(result.analysisContext, sourceDir);
return result;
}
function walkDirectory(rootDir, relativeDir, files, treeLines) {
const absoluteDir = relativeDir ? path.join(rootDir, relativeDir) : rootDir;
const entries = fs
.readdirSync(absoluteDir, { withFileTypes: true })
.sort((a, b) => a.name.localeCompare(b.name, "en"));
for (const entry of entries) {
const relativePath = relativeDir
? path.posix.join(normalizePath(relativeDir), entry.name)
: entry.name;
treeLines.push(`${entry.isDirectory() ? "[D]" : "[F]"} ${relativePath}`);
if (entry.isDirectory()) {
walkDirectory(rootDir, relativePath, files, treeLines);
continue;
}
const ext = path.extname(entry.name).toLowerCase();
if (!TEXT_FILE_EXTENSIONS.has(ext)) {
continue;
}
const absolutePath = path.join(absoluteDir, entry.name);
const stat = fs.statSync(absolutePath);
if (stat.size > 1024 * 1024) {
continue;
}
const content = fs.readFileSync(absolutePath, "utf-8");
files.push({
path: normalizePath(relativePath),
content,
});
}
}
function buildAnalysisContext(sourceDir, dirContents) {
const files = Array.isArray(dirContents.files) ? dirContents.files : [];
const indexHtml = dirContents.indexHtml || "";
const directorySummary = {
sourceDir: normalizePath(sourceDir),
tree: dirContents.tree || "(empty)",
files: files.map((file) => ({
path: file.path,
length: file.content.length,
})),
};
const THIRD_PARTY_JS_PATTERNS = [
"vue.js",
"vue.min.js",
"element-ui",
"elementui",
"axios",
"jquery",
"jquery.min.js",
"echarts",
"echarts.min.js",
];
const dirFiles = dirContents.files || [];
const businessJsFragments = [];
for (const file of dirFiles) {
if (!file.path.startsWith("js/") || !file.path.endsWith(".js")) continue;
const baseName = path.basename(file.path).toLowerCase();
if (THIRD_PARTY_JS_PATTERNS.some((pattern) => baseName.includes(pattern))) continue;
businessJsFragments.push({
path: file.path,
snippet: file.content.length > 600 ? file.content.slice(0, 600) : file.content,
});
}
return {
directorySummary,
indexHtmlChunks: chunkFile("index.html", indexHtml, 3000, 2),
urlFragments: collectFragments(files, isUrlFragment, 10),
requestFragments: collectFragments(files, isRequestFragment, 10),
branchingFragments: collectFragments(files, isBranchFragment, 8),
responseFragments: collectFragments(files, isResponseFragment, 8),
exportFragments: collectFragments(files, isExportFragment, 6),
bootstrapHints: collectBootstrapHints(files, indexHtml),
deterministicSignals: collectDeterministicSignals(files, indexHtml),
businessJsFragments,
};
}
function chunkFile(filePath, content, chunkSize, maxChunks) {
if (!content) return [];
const chunks = [];
let index = 0;
let offset = 0;
while (offset < content.length && chunks.length < maxChunks) {
chunks.push({
path: filePath,
index,
start: offset,
end: Math.min(offset + chunkSize, content.length),
content: content.slice(offset, offset + chunkSize),
});
index += 1;
offset += chunkSize;
}
return chunks;
}
function collectFragments(files, predicate, limit) {
const fragments = [];
for (const file of files) {
const fileFragments = extractLineFragments(file.path, file.content, predicate, limit - fragments.length);
fragments.push(...fileFragments);
if (fragments.length >= limit) {
break;
}
}
return fragments;
}
function extractLineFragments(filePath, content, predicate, remainingLimit) {
if (!content || remainingLimit <= 0) return [];
const fragments = [];
const lines = content.split(/\r?\n/);
const seen = new Set();
for (let i = 0; i < lines.length && fragments.length < remainingLimit; i += 1) {
if (!predicate(lines[i])) continue;
const start = Math.max(0, i - 3);
const end = Math.min(lines.length, i + 4);
const snippet = lines.slice(start, end).join("\n").trim();
if (!snippet || seen.has(snippet)) continue;
seen.add(snippet);
fragments.push({
path: filePath,
lineStart: start + 1,
lineEnd: end,
snippet: snippet.length > 1200 ? snippet.slice(0, 1200) : snippet,
});
}
return fragments;
}
function isUrlFragment(line) {
return /(https?:\/\/|['"`]\/[^'"`\s]+|url\s*:|fetch\s*\(|axios\.(get|post|request)|\$\.(ajax|get|post))/i.test(line);
}
function isRequestFragment(line) {
return /(contentType|JSON\.stringify|requestBody|requestData|payload|params|data\s*:|body\s*:|FormData|\bpage(Size|No)?\b|\brows\b)/i.test(line);
}
function isBranchFragment(line) {
return /\b(if|else if|switch|case)\b/.test(line) && /(mode|period|reportType|tab|status|scene|type)/i.test(line);
}
function isResponseFragment(line) {
return /(response|res|result)\.(data|content|rows|list|records|items)|\.map\(|\.filter\(|columnDefs|columns|normalize|transform/i.test(line);
}
function isExportFragment(line) {
return /(export|download|xlsx|csv|blob|saveAs|excel)/i.test(line);
}
function collectBootstrapHints(files, indexHtml) {
const hints = [];
const seen = new Set();
for (const match of indexHtml.matchAll(/<(a|form|iframe)[^>]+(?:href|action|src)=["']([^"']+)["']/gi)) {
const url = match[2];
if (url && !isStaticAssetUrl(url) && !seen.has(url)) {
seen.add(url);
hints.push({ type: match[1], url });
}
}
for (const file of files) {
const namedUrlMatches = file.content.matchAll(
/\b(sourceUrl|sourceURL|baseUrl|baseURL|targetUrl|requestUrl|apiUrl|gatewayUrl|loginPath|mainPath)\b\s*[:=]\s*(['"`])(https?:\/\/[^'"`\s]+)\2/gi
);
for (const match of namedUrlMatches) {
const url = match[3];
const type = String(match[1] || "").toLowerCase();
if (url && !seen.has(url)) {
seen.add(url);
// loginPath/mainPath are bootstrap hints — the domain is expected_domain
if (type === "loginpath" || type === "mainpath") {
const domain = new URL(url).hostname;
hints.push({ type: "expected_domain", value: domain, path: file.path });
hints.push({ type: "target_url", value: url, path: file.path });
} else {
hints.push({ type, url, path: file.path });
}
}
}
const matches = file.content.matchAll(/window\.open\((['"`])([^'"`]+)\1|location\.(?:href|assign|replace)\((['"`])([^'"`]+)\3/gi);
for (const match of matches) {
const url = match[2] || match[4];
if (url && !seen.has(url)) {
seen.add(url);
hints.push({ type: "navigation", url, path: file.path });
}
}
}
return hints.slice(0, 12);
}
function collectDeterministicSignals(files, indexHtml) {
const urls = new Map();
const methods = new Map();
const responsePaths = new Set();
const branchFields = new Set();
const modeValues = new Set();
const paginationVars = new Set();
const filterExpressions = new Set();
const entryMethods = new Set();
const exportMethods = new Set();
const secondaryRequestMethods = new Set();
const pageTitleKeywords = new Set();
const staticParams = {};
const g1eJoinKeys = new Set();
const g1eAggregateRules = new Set();
const g1eOutputColumns = [];
for (const file of files) {
const content = file.content;
for (const endpoint of extractEndpoints(content)) {
const key = `${endpoint.method}|${endpoint.url}`;
if (!urls.has(key)) {
urls.set(key, endpoint);
}
}
for (const match of content.matchAll(/\b(?:data|item|row)\.(wkOrderNo|countyCodeName|orgNo|orgCode)\b/g)) {
g1eJoinKeys.add(match[1]);
}
for (const match of content.matchAll(/\b(com|batchCom)\s*\([^)]*?["']([A-Za-z_][A-Za-z0-9_]*)["']\s*,\s*["']([A-Za-z_][A-Za-z0-9_]*)["']/g)) {
g1eAggregateRules.add(`${match[1]}:${match[2]},${match[3]}`);
}
const titleListBlock = content.match(/const\s+titleList\s*=\s*\[([\s\S]*?)\]\s*[;\n]/i);
if (titleListBlock) {
for (const triple of titleListBlock[1].matchAll(/\[\s*["']([A-Za-z_][A-Za-z0-9_]*)["']\s*,\s*["']([^"'`]+)["']\s*,\s*["']([^"'`]*)["']\s*\]/g)) {
const field = triple[1];
const top = String(triple[2] || "").trim();
const leaf = String(triple[3] || "").trim();
if (!field || g1eOutputColumns.some((item) => item[0] === field)) continue;
g1eOutputColumns.push([field, leaf ? `${top}-${leaf}` : top]);
}
}
for (const match of content.matchAll(/\b(type|method)\s*:\s*['"`](GET|POST|PUT|DELETE|PATCH)['"`]/gi)) {
methods.set(match[2].toUpperCase(), true);
}
for (const match of content.matchAll(/\b(?:response|res|result)\.(data|content|rows|list|records|items)\b/g)) {
responsePaths.add(match[1]);
}
for (const match of content.matchAll(/\b(?:if|switch|case)\b[\s\S]{0,120}?(period_mode|reportType|mode|tab|sceneType|status|type)\b/gi)) {
branchFields.add(match[1]);
}
for (const match of content.matchAll(/\b(period_mode|reportType|mode|tab)\b[\s\S]{0,80}?['"`](month|week|day|detail|summary|list|chart)['"`]/gi)) {
modeValues.add(match[2]);
}
for (const match of content.matchAll(/\b(pageSize|pageNo|pageNum|page|rows|limit|offset)\b/g)) {
paginationVars.add(match[1]);
}
for (const match of content.matchAll(/\.filter\(\s*(.+?)\s*\)|if\s*\(([^)]*(?:!==|!=|===|==|>|<)[^)]*)\)/g)) {
const expr = (match[1] || match[2] || "").trim();
if (expr && expr.length <= 160) {
filterExpressions.add(expr);
}
}
for (const match of content.matchAll(/(?:function\s+|const\s+|let\s+|var\s+)([A-Za-z_$][\w$]*)\s*(?:=\s*(?:async\s*)?\(|\()/g)) {
const name = match[1];
if (/(query|search|load|fetch|init|mounted|created|getData)/i.test(name)) {
entryMethods.add(name);
}
if (/(export|download|excel|csv)/i.test(name)) {
exportMethods.add(name);
}
if (/(detail|charge|charges|info|details)/i.test(name)) {
secondaryRequestMethods.add(name);
}
}
for (const match of content.matchAll(/(?:title|document\.title)\s*[:=]\s*['"`]([^'"`]{2,40})['"`]/gi)) {
pageTitleKeywords.add(match[1]);
}
const staticParamMatches = content.matchAll(/\b(orgNo|orgCode|orgId|period_mode|reportType|pageSize|rows)\b\s*:\s*['"`]([^'"`\n]+)['"`]/gi);
for (const match of staticParamMatches) {
if (!(match[1] in staticParams)) {
staticParams[match[1]] = match[2];
}
}
}
for (const match of indexHtml.matchAll(/<title>([^<]{2,40})<\/title>/gi)) {
pageTitleKeywords.add(match[1].trim());
}
const bootstrapCandidates = collectBootstrapCandidates(files, indexHtml, Array.from(urls.values()));
const allEndpoints = Array.from(urls.values());
const g1eRequestRoles = deriveG1eRequestRoles(allEndpoints);
return {
endpoints: allEndpoints,
methods: Array.from(methods.keys()),
responsePaths: Array.from(responsePaths),
branchFields: Array.from(branchFields),
modeValues: Array.from(modeValues),
paginationVars: Array.from(paginationVars),
filterExpressions: Array.from(filterExpressions).slice(0, 8),
entryMethods: Array.from(entryMethods).slice(0, 10),
exportMethods: Array.from(exportMethods).slice(0, 10),
secondaryRequestMethods: Array.from(secondaryRequestMethods).slice(0, 10),
pageTitleKeywords: Array.from(pageTitleKeywords).slice(0, 10),
staticParams,
bootstrapCandidates,
g1eMainRequest: g1eRequestRoles.mainRequest,
g1eEnrichmentRequests: g1eRequestRoles.enrichmentRequests,
g1eJoinKeys: Array.from(g1eJoinKeys),
g1eOutputColumns: g1eOutputColumns.slice(0, 24),
g1eAggregateRules: Array.from(g1eAggregateRules).slice(0, 12),
};
}
function deriveG1eRequestRoles(endpoints) {
const mainRequest = endpoints.find((endpoint) =>
/getwkorderall|all|list/i.test(endpoint?.name || "") && !containsRowBinding(endpoint?.requestTemplate)
) || null;
const enrichmentRequests = (endpoints || [])
.filter((endpoint) => {
if (mainRequest && endpoint.url === mainRequest.url) return false;
return (
containsRowBinding(endpoint?.requestTemplate) ||
/query|info|acpt/i.test(endpoint?.name || "") ||
/query|info|acpt/i.test(endpoint?.url || "")
);
})
.slice(0, 6);
return { mainRequest, enrichmentRequests };
}
function containsRowBinding(value) {
if (!value) return false;
if (typeof value === "string") return value.includes("${row.");
if (Array.isArray(value)) return value.some(containsRowBinding);
if (typeof value === "object") return Object.values(value).some(containsRowBinding);
return false;
}
function extractEndpoints(content) {
const endpoints = [];
const seen = new Set();
const lines = content.split(/\r?\n/);
for (let index = 0; index < lines.length; index += 1) {
const line = lines[index];
if (!isUrlFragment(line)) continue;
const block = lines.slice(Math.max(0, index - 2), Math.min(lines.length, index + 5)).join("\n");
const urlMatch =
block.match(/\burl\s*:\s*(['"`])([^'"`]+)\1/i) ||
block.match(/fetch\s*\(\s*(['"`])([^'"`]+)\1/i) ||
block.match(/axios\.(?:get|post|request)\s*\(\s*(['"`])([^'"`]+)\1/i);
if (!urlMatch) continue;
const url = sanitizeUrl(urlMatch[2]);
if (!url) continue;
const methodMatch =
block.match(/\b(?:type|method)\s*:\s*(['"`])([A-Z]+)\1/i) ||
block.match(/\baxios\.post\s*\(/i) ||
block.match(/\baxios\.get\s*\(/i);
const method = methodMatch
? String(methodMatch[2] || (methodMatch[0].includes(".post") ? "POST" : "GET")).toUpperCase()
: "GET";
const contentTypeMatch = block.match(/\bcontentType\s*:\s*(['"`])([^'"`]+)\1/i);
const name = inferEndpointName(block, url, endpoints.length);
const role = classifyRequestRole(url);
const key = `${method}|${url}`;
if (seen.has(key)) continue;
seen.add(key);
endpoints.push({
name,
url,
role,
method,
contentType: contentTypeMatch ? contentTypeMatch[2] : null,
description: `Detected from source snippet around line ${index + 1}`,
});
}
return endpoints.slice(0, 12);
}
function sanitizeUrl(rawUrl) {
if (!rawUrl) return "";
const value = rawUrl.trim();
if (!value) return "";
if (isStaticAssetUrl(value)) return "";
if (isTemplateNoiseUrl(value)) return "";
if (/^(javascript:|data:|#)/i.test(value)) return "";
if (/\.js(\?|$)|\.css(\?|$)|\.png(\?|$)|\.svg(\?|$)/i.test(value)) return "";
return value;
}
function classifyRequestRole(rawUrl) {
const value = String(rawUrl || "").toLowerCase();
if (!value || isTemplateNoiseUrl(value)) return "template_noise";
if (value.includes("localhost") || value.includes("127.0.0.1")) {
return /(surfaceservices|reportservices|export)/i.test(value) ? "export_service" : "local_helper";
}
if (value.includes("gateway")) return "gateway_api";
return "business_api";
}
function inferEndpointName(block, url, index) {
const functionMatch = block.match(/(?:function|const|let|var)\s+([A-Za-z_$][\w$]*)/);
if (functionMatch) return functionMatch[1];
const pathParts = url.split(/[/?#]/).filter(Boolean);
return pathParts[pathParts.length - 1] || `endpoint_${index + 1}`;
}
function collectBootstrapCandidates(files, indexHtml, endpoints) {
const candidates = [];
const seen = new Set();
for (const endpoint of endpoints) {
const candidate = buildBootstrapCandidate(endpoint.url, "api_endpoint");
const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : "";
if (candidate && !seen.has(key)) {
seen.add(key);
candidates.push(candidate);
}
}
for (const hint of collectBootstrapHints(files, indexHtml)) {
const candidate = buildBootstrapCandidate(hint.url, hint.type);
const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : "";
if (candidate && !seen.has(key)) {
seen.add(key);
candidates.push(candidate);
}
}
return candidates.slice(0, 8);
}
function buildBootstrapCandidate(rawUrl, source) {
if (!rawUrl) return null;
try {
const url = new URL(rawUrl, "http://placeholder.local");
const role = classifyBootstrapRole(rawUrl, source, url);
if (!role) {
return null;
}
const isAbsolute = /^https?:\/\//i.test(rawUrl);
const targetUrl = isAbsolute
? role === "business_api" || role === "gateway_api"
? url.origin
: `${url.origin}${url.pathname}`
: rawUrl;
return {
expectedDomain: isAbsolute ? url.hostname : "",
targetUrl,
source,
role,
validForBootstrap: BOOTSTRAP_ROLE_PRIORITY.includes(role),
};
} catch (_) {
return null;
}
}
function classifyBootstrapRole(rawUrl, source, parsedUrl) {
const value = String(rawUrl || "").trim();
if (!value || isTemplateNoiseUrl(value)) return "template_noise";
if (isStaticAssetUrl(value)) return "static_asset";
if (/^(javascript:|data:|#)/i.test(value)) return "template_noise";
const isAbsolute = /^https?:\/\//i.test(value);
if (!isAbsolute) {
return value.startsWith("/") ? "business_entry" : "template_noise";
}
const hostname = String(parsedUrl?.hostname || "").toLowerCase();
const pathname = String(parsedUrl?.pathname || "");
if (hostname === "localhost" || hostname === "127.0.0.1") {
if (/(SurfaceServices|ReportServices|export)/i.test(pathname)) {
return "export_service";
}
return "local_helper";
}
if (/(gateway)/i.test(hostname) || /(gateway)/i.test(value)) {
return "gateway_api";
}
if (/(sourceurl|targeturl|navigation|form|iframe|a)/i.test(source)) {
return "business_entry";
}
if (/(apiurl|requesturl|baseurl|api_endpoint)/i.test(source) || /\/api\//i.test(pathname)) {
return "business_api";
}
return "business_entry";
}
function isTemplateNoiseUrl(rawUrl) {
return /\$\{[^}]+\}|%s|placeholder|not a valid/i.test(rawUrl);
}
function isStaticAssetUrl(rawUrl) {
return /(?:cdn|static|assets?)|(?:\.js|\.css|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.ico)(?:\?|$)/i.test(rawUrl);
}
function buildDeterministicSceneIr(context, sourceDir) {
const signals = context.deterministicSignals || {};
const sceneName = humanizeSceneName(path.basename(sourceDir));
const sceneIdDiagnostics = deriveSceneIdDiagnostics({ sourceDir, sceneName, signals });
const sceneId = sceneIdDiagnostics.selected || "";
const workflowArchetype = classifyWorkflowArchetype(signals);
const evidence = buildEvidence(signals, workflowArchetype);
const bootstrap = deriveBootstrap(signals);
const modes = buildModes(signals);
const workflowSteps = buildWorkflowSteps(signals, workflowArchetype);
const workflowEvidence = buildWorkflowEvidence(signals);
const responsePath = signals.responsePaths?.[0] || "";
const normalizeRules = buildNormalizeRules(signals);
const params = buildParams(signals, workflowArchetype);
const confidence = scoreConfidence(signals, workflowArchetype);
const mainRequest = buildG1eMainRequest(signals);
const enrichmentRequests = buildG1eEnrichmentRequests(signals);
const mergePlan = buildG1eMergePlan(signals);
const readiness = buildReadiness({
sceneIdDiagnostics,
workflowArchetype,
bootstrap,
apiEndpoints: signals.endpoints || [],
params,
workflowSteps,
mainRequest,
enrichmentRequests,
mergePlan,
confidence,
});
return {
sceneId,
sceneIdDiagnostics,
sceneName,
sceneKind: "report_collection",
workflowArchetype,
bootstrap,
params,
modes,
defaultMode: modes[0] ? modes[0].name : null,
modeSwitchField: signals.branchFields?.find((field) => /mode|period/i.test(field)) || null,
workflowSteps,
workflowEvidence,
mainRequest,
enrichmentRequests,
mergePlan,
requestTemplate: mainRequest?.requestTemplate || {},
responsePath: mainRequest?.responsePath || responsePath,
normalizeRules,
artifactContract: {
type: "report-artifact",
successStatus: ["ok", "partial", "empty"],
failureStatus: ["blocked", "error"],
},
validationHints: {
requiresTargetPage: true,
runtimeCompatible: params.every((param) =>
["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver)
),
manualCompletionRequired: readiness.level !== "A",
missingPieces: readiness.missingPieces.slice(),
},
evidence,
readiness,
apiEndpoints: signals.endpoints || [],
staticParams: signals.staticParams || {},
columnDefs: mergePlan?.outputColumns || [],
confidence,
uncertainties: buildUncertainties(signals, workflowArchetype),
deterministicSignals: signals,
};
}
function buildG1eMainRequest(signals) {
const endpoint = signals.g1eMainRequest || null;
if (!endpoint) return null;
return {
apiEndpoint: endpoint,
requestTemplate: endpoint.requestTemplate || {},
responsePath: signals.responsePaths?.[0] || "",
columnDefs: signals.g1eOutputColumns || [],
};
}
function buildG1eEnrichmentRequests(signals) {
return (signals.g1eEnrichmentRequests || []).map((endpoint) => ({
name: endpoint.name,
apiEndpoint: endpoint,
paramBindings: endpoint.requestTemplate || {},
responsePath: signals.responsePaths?.[0] || "",
consumedFields: (signals.g1eAggregateRules || [])
.flatMap((rule) => String(rule).split(":")[1]?.split(",") || [])
.map((item) => item.trim())
.filter(Boolean),
}));
}
function buildG1eMergePlan(signals) {
if (!(signals.g1eJoinKeys || []).length && !(signals.g1eOutputColumns || []).length && !(signals.g1eAggregateRules || []).length) {
return null;
}
return {
joinKeys: signals.g1eJoinKeys || [],
fieldMappings: (signals.g1eOutputColumns || []).map(([field]) => ({
outputField: field,
sourceType: (signals.g1eJoinKeys || []).includes(field) ? "main" : "aggregate",
sourceField: field,
requestName: null,
})),
aggregateRules: signals.g1eAggregateRules || [],
outputColumns: signals.g1eOutputColumns || [],
};
}
function deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }) {
const baseName = path.basename(sourceDir || "");
const candidates = [];
const seen = new Set();
function pushCandidate(value, source) {
const normalized = slugifyAscii(value);
if (!normalized || seen.has(normalized)) return;
seen.add(normalized);
const validation = validateSceneIdCandidate(normalized, { sceneName, sourceDir, signals });
candidates.push({
value: normalized,
source,
valid: validation.valid,
reason: validation.valid ? null : validation.reason,
});
}
pushCandidate(buildKeywordSceneId(baseName, sceneName, signals), "deterministic_keywords");
pushCandidate(buildAliasSceneId(baseName), "controlled_alias");
pushCandidate(slugifyAscii(baseName), "directory_slug");
const selectedCandidate = candidates.find((candidate) => candidate.valid) || candidates[0] || null;
return {
selected: selectedCandidate?.value || "",
candidateSource: selectedCandidate?.source || "",
valid: Boolean(selectedCandidate?.valid),
invalidReason: selectedCandidate && !selectedCandidate.valid ? selectedCandidate.reason : null,
candidates,
};
}
function buildKeywordSceneId(baseName, sceneName, signals) {
const aliasedCandidates = Array.from(
new Set(
[baseName, sceneName]
.filter(Boolean)
.map((value) => slugifyAscii(applySceneIdAliases(value)))
.filter(Boolean)
)
).sort((left, right) => right.length - left.length);
const aliased = aliasedCandidates[0] || "";
if (aliased && aliased.split("-").length >= 2) {
return aliased;
}
const tokens = extractBusinessTokens(
[baseName, sceneName]
.filter(Boolean)
.join(" "),
signals
);
if (!tokens.length) return "";
return tokens.slice(0, 5).join("-");
}
function buildAliasSceneId(baseName) {
if (!baseName) return "";
let value = String(baseName || "");
for (const rule of SCENE_ID_ALIAS_RULES) {
value = value.replace(rule.pattern, ` ${rule.replacement} `);
}
return slugifyAscii(value);
}
function extractBusinessTokens(rawText, signals = {}) {
const candidates = [];
const pushTokens = (value) => {
const slug = slugifyAscii(value);
if (!slug) return;
for (const token of slug.split("-")) {
if (!token || token.length < 2 || GENERIC_SCENE_ID_TOKENS.has(token)) continue;
candidates.push(token);
}
};
pushTokens(applySceneIdAliases(rawText));
for (const keyword of signals.pageTitleKeywords || []) {
pushTokens(applySceneIdAliases(keyword));
}
for (const endpoint of signals.endpoints || []) {
pushTokens(endpoint?.name);
const urlValue = String(endpoint?.url || "");
const segments = urlValue
.split(/[/?#=&._-]+/)
.filter(Boolean)
.slice(-4);
for (const segment of segments) {
pushTokens(segment);
}
}
return Array.from(new Set(candidates));
}
function applySceneIdAliases(value) {
let text = String(value || "");
for (const rule of SCENE_ID_ALIAS_RULES) {
text = text.replace(rule.pattern, ` ${rule.replacement} `);
}
return text;
}
function validateSceneIdCandidate(sceneId, { sceneName = "", sourceDir = "", signals = {} } = {}) {
const normalized = slugifyAscii(sceneId);
if (!normalized) {
return { valid: false, reason: "empty_scene_id" };
}
if (GENERIC_SCENE_IDS.has(normalized)) {
return { valid: false, reason: "generic_scene_id" };
}
const letters = (normalized.match(/[a-z]/g) || []).length;
const digits = (normalized.match(/\d/g) || []).length;
if (!letters) {
return { valid: false, reason: "numeric_only_scene_id" };
}
if ((normalized.length < 5 || letters < 3) && !normalized.includes("-")) {
return { valid: false, reason: "scene_id_too_short" };
}
if (digits > letters && letters < 4) {
return { valid: false, reason: "numeric_dominant_scene_id" };
}
const expectedTokens = extractBusinessTokens(
[sceneName, path.basename(sourceDir || "")]
.filter(Boolean)
.join(" "),
signals
).filter((token) => token.length >= 3);
if (expectedTokens.length) {
const matched = expectedTokens.some((token) => normalized.includes(token));
if (!matched) {
return { valid: false, reason: "scene_id_semantic_detached" };
}
}
return { valid: true, reason: null };
}
function classifyWorkflowArchetype(signals) {
const businessEndpoints = getBusinessEndpoints(signals);
const hasPagination = (signals.paginationVars || []).length > 0;
const hasSecondaryRequest =
(signals.secondaryRequestMethods || []).length > 0 || businessEndpoints.length >= 2;
const hasPostProcess =
(signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0;
if (signals.g1eMainRequest && (signals.g1eEnrichmentRequests || []).length > 0) {
const hasG1eMergeSignal = (signals.g1eJoinKeys || []).length > 0 || (signals.g1eOutputColumns || []).length > 0;
if (hasG1eMergeSignal) {
return "single_request_enrichment";
}
}
if (hasPagination && hasSecondaryRequest && hasPostProcess) {
return "paginated_enrichment";
}
const hasModeBranch = (signals.branchFields || []).some((field) => /period_mode|reportType|tjzq|mode/i.test(field));
const hasModeValues = (signals.modeValues || []).length >= 2;
const hasModeDivergence = hasModeValues && businessEndpoints.length >= 2;
if (hasModeBranch && hasModeDivergence) {
return "multi_mode_request";
}
const pageStateSignals = [...(signals.entryMethods || []), ...(signals.filterExpressions || [])].join(" ");
if (/(state|status|ready|available|enabled)/i.test(pageStateSignals) && businessEndpoints.length <= 1) {
return "page_state_eval";
}
return "single_request_table";
}
function createEvidenceItem({
kind = "deterministic",
evidenceType = "signal",
layer = "business",
subject = "",
summary = "",
source = "runner",
confidence = 0.7,
payload = null,
}) {
return {
kind,
evidenceType,
layer,
subject: subject || evidenceType,
summary,
source,
confidence,
payload: payload && typeof payload === "object" ? payload : null,
};
}
function buildEvidence(signals, workflowArchetype) {
const evidence = [];
const businessEndpoints = getBusinessEndpoints(signals);
const localDependencies = (signals.endpoints || []).filter((endpoint) => endpoint.role === "local_helper");
const exportServices = (signals.endpoints || []).filter((endpoint) => endpoint.role === "export_service");
const bootstrapCandidate = (signals.bootstrapCandidates || []).find((candidate) => candidate.validForBootstrap);
if (bootstrapCandidate) {
evidence.push(
createEvidenceItem({
evidenceType: "bootstrap_candidate",
layer: "business",
subject: bootstrapCandidate.expectedDomain || bootstrapCandidate.targetUrl || "bootstrap",
summary: `Bootstrap candidate resolved to ${bootstrapCandidate.expectedDomain || bootstrapCandidate.targetUrl}.`,
confidence: 0.92,
payload: bootstrapCandidate,
})
);
}
for (const endpoint of businessEndpoints) {
evidence.push(
createEvidenceItem({
evidenceType: "endpoint_candidate",
layer: "business",
subject: endpoint.name || endpoint.url,
summary: `Business endpoint ${endpoint.name || endpoint.url} detected.`,
confidence: 0.9,
payload: endpoint,
})
);
}
if ((signals.branchFields || []).length > 0 || (signals.modeValues || []).length > 0) {
evidence.push(
createEvidenceItem({
evidenceType: "mode_candidate",
layer: "business",
subject: workflowArchetype,
summary: `Mode signals: fields=${(signals.branchFields || []).join(", ") || "none"} values=${(signals.modeValues || []).join(", ") || "none"}`,
confidence: 0.86,
payload: {
branchFields: signals.branchFields || [],
modeValues: signals.modeValues || [],
},
})
);
}
const requestSignals = uniqueStringValues([
...(signals.entryMethods || []),
...Object.keys(signals.staticParams || {}),
]);
if (requestSignals.length > 0) {
evidence.push(
createEvidenceItem({
evidenceType: "request_template_candidate",
layer: "business",
subject: requestSignals[0],
summary: `Request-side signals detected: ${requestSignals.join(", ")}`,
confidence: 0.8,
payload: {
entryMethods: signals.entryMethods || [],
staticParams: signals.staticParams || {},
},
})
);
}
if (signals.g1eMainRequest) {
evidence.push(
createEvidenceItem({
evidenceType: "main_request_candidate",
layer: "business",
subject: signals.g1eMainRequest.name,
summary: `G1-E main request candidate resolved to ${signals.g1eMainRequest.name}.`,
confidence: 0.86,
payload: signals.g1eMainRequest,
})
);
}
for (const endpoint of signals.g1eEnrichmentRequests || []) {
evidence.push(
createEvidenceItem({
evidenceType: "enrichment_request_candidate",
layer: "business",
subject: endpoint.name,
summary: `G1-E enrichment request candidate resolved to ${endpoint.name}.`,
confidence: 0.84,
payload: endpoint,
})
);
}
if ((signals.g1eJoinKeys || []).length || (signals.g1eOutputColumns || []).length || (signals.g1eAggregateRules || []).length) {
evidence.push(
createEvidenceItem({
evidenceType: "merge_plan_candidate",
layer: "workflow",
subject: "g1e_merge_plan",
summary: "G1-E merge plan candidate detected.",
confidence: 0.83,
payload: {
joinKeys: signals.g1eJoinKeys || [],
outputColumns: signals.g1eOutputColumns || [],
aggregateRules: signals.g1eAggregateRules || [],
},
})
);
}
if ((signals.responsePaths || []).length > 0) {
evidence.push(
createEvidenceItem({
evidenceType: "response_path_candidate",
layer: "business",
subject: signals.responsePaths[0],
summary: `Response paths detected: ${signals.responsePaths.join(", ")}`,
confidence: 0.84,
payload: { responsePaths: signals.responsePaths || [] },
})
);
}
if ((signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0) {
evidence.push(
createEvidenceItem({
evidenceType: "normalize_rules_candidate",
layer: "business",
subject: "normalize_rules",
summary: `Post-process signals detected: filters=${(signals.filterExpressions || []).length}, exports=${(signals.exportMethods || []).length}`,
confidence: 0.76,
payload: {
filterExpressions: signals.filterExpressions || [],
exportMethods: signals.exportMethods || [],
},
})
);
}
if ((signals.paginationVars || []).length > 0 || (signals.secondaryRequestMethods || []).length > 0) {
evidence.push(
createEvidenceItem({
evidenceType: "workflow_candidate",
layer: "workflow",
subject: workflowArchetype,
summary: `Workflow signals: pagination=${(signals.paginationVars || []).join(", ") || "none"} secondary=${(signals.secondaryRequestMethods || []).join(", ") || "none"}`,
confidence: 0.82,
payload: {
paginationVars: signals.paginationVars || [],
secondaryRequestMethods: signals.secondaryRequestMethods || [],
exportMethods: signals.exportMethods || [],
},
})
);
}
for (const endpoint of [...localDependencies, ...exportServices]) {
evidence.push(
createEvidenceItem({
evidenceType: "localhost_dependency_candidate",
layer: "host_runtime",
subject: endpoint.name || endpoint.url,
summary: `Host runtime dependency detected: ${endpoint.url}`,
confidence: 0.88,
payload: endpoint,
})
);
}
if ((signals.exportMethods || []).length > 0 || exportServices.length > 0) {
evidence.push(
createEvidenceItem({
evidenceType: "export_candidate",
layer: "output",
subject: signals.exportMethods?.[0] || exportServices[0]?.name || "export",
summary: `Export signals detected: ${(signals.exportMethods || []).join(", ") || exportServices.map((item) => item.url).join(", ")}`,
confidence: 0.78,
payload: {
exportMethods: signals.exportMethods || [],
exportEndpoints: exportServices,
},
})
);
}
evidence.push(
createEvidenceItem({
kind: "classification",
evidenceType: "workflow_candidate",
layer: "classification",
subject: workflowArchetype,
summary: `Workflow archetype classified as ${workflowArchetype}.`,
confidence: 0.72,
payload: { workflowArchetype },
})
);
return evidence;
}
function deriveBootstrap(signals) {
const candidate = BOOTSTRAP_ROLE_PRIORITY
.map((role) =>
(signals.bootstrapCandidates || []).find(
(item) => item.role === role && item.validForBootstrap && item.targetUrl
)
)
.find(Boolean);
if (candidate) {
return {
expectedDomain: candidate.expectedDomain || "",
targetUrl: candidate.targetUrl || "",
requiresTargetPage: true,
pageTitleKeywords: signals.pageTitleKeywords || [],
source: candidate.source || "deterministic",
};
}
return {
...DEFAULT_BOOTSTRAP,
pageTitleKeywords: signals.pageTitleKeywords || [],
};
}
function getBusinessEndpoints(signals) {
return (signals.endpoints || []).filter((endpoint) =>
["business_api", "gateway_api", "business_entry"].includes(endpoint.role)
);
}
function buildModes(signals) {
const values = (signals.modeValues || []).slice(0, 4);
if (!values.length) return [];
return values.map((value, index) => ({
name: value,
label: value,
condition: {
field: signals.branchFields?.find((field) => /mode|period|tab|type/i.test(field)) || "period_mode",
operator: "equals",
value,
},
apiEndpoint: signals.endpoints?.[index] || signals.endpoints?.[0] || null,
columnDefs: [],
requestTemplate: {},
normalizeRules: buildNormalizeRules(signals),
responsePath: signals.responsePaths?.[0] || "",
}));
}
function buildWorkflowSteps(signals, workflowArchetype) {
const steps = [];
const businessEndpoints = getBusinessEndpoints(signals);
const primaryEndpoint = businessEndpoints[0]?.name || null;
const secondaryEndpoint = businessEndpoints[1]?.name || null;
if (workflowArchetype === "single_request_enrichment") {
steps.push({
type: "request",
entry: signals.entryMethods?.[0] || null,
endpoint: signals.g1eMainRequest?.name || primaryEndpoint,
description: "Query the main list for G1-E workflow.",
});
for (const endpoint of signals.g1eEnrichmentRequests || []) {
steps.push({
type: "enrichment_request",
endpoint: endpoint.name,
description: "Fetch lightweight enrichment payload.",
});
}
steps.push({
type: "transform",
description: "Merge enrichment payloads into aggregate output.",
});
return steps;
}
if (workflowArchetype === "multi_mode_request") {
steps.push({
type: "request",
entry: signals.entryMethods?.[0] || null,
endpoint: primaryEndpoint,
description: "Select mode and query the matching endpoint.",
});
steps.push({
type: "transform",
description: "Normalize mode-specific rows into a shared artifact.",
});
return steps;
}
if (workflowArchetype === "paginated_enrichment") {
steps.push({
type: "paginate",
entry: signals.entryMethods?.[0] || null,
endpoint: primaryEndpoint,
description: "Iterate primary list pages.",
});
if ((signals.secondaryRequestMethods || []).length > 0 || secondaryEndpoint) {
steps.push({
type: "secondary_request",
entry: signals.secondaryRequestMethods?.[0] || signals.entryMethods?.[1] || null,
endpoint: secondaryEndpoint,
description: "Fetch per-row or batched detail data.",
});
}
if (signals.filterExpressions?.[0]) {
steps.push({
type: "filter",
expr: signals.filterExpressions[0],
description: "Apply business-side filtering.",
});
}
if (signals.exportMethods?.[0]) {
steps.push({
type: "export",
entry: signals.exportMethods[0],
description: "Prepare export payload or trigger download logic.",
});
}
return steps;
}
if (workflowArchetype === "page_state_eval") {
steps.push({
type: "page_state",
entry: signals.entryMethods?.[0] || null,
description: "Evaluate page state and derive readiness outcome.",
});
return steps;
}
steps.push({
type: "request",
entry: signals.entryMethods?.[0] || null,
endpoint: primaryEndpoint,
description: "Issue the primary scene request.",
});
steps.push({
type: "transform",
description: "Normalize the primary response.",
});
return steps;
}
function buildNormalizeRules(signals) {
return {
type: "validate_required",
requiredFields: [],
filterNull: true,
responseHints: signals.responsePaths || [],
};
}
function buildWorkflowEvidence(signals) {
return {
requestEntries: uniqueStringValues([
...(signals.entryMethods || []).slice(0, 3),
...getBusinessEndpoints(signals).map((endpoint) => endpoint.name).slice(0, 3),
]),
paginationFields: uniqueStringValues(signals.paginationVars || []),
secondaryRequestEntries: uniqueStringValues([
...(signals.secondaryRequestMethods || []),
...getBusinessEndpoints(signals).slice(1, 3).map((endpoint) => endpoint.name),
]),
postProcessSteps: uniqueStringValues([
...(signals.filterExpressions || []).map(() => "filter"),
...(signals.exportMethods || []).map(() => "export"),
]),
};
}
function buildParams(signals, workflowArchetype) {
const params = [];
const staticKeys = Object.keys(signals.staticParams || {});
if (staticKeys.some((key) => /org/i.test(key)) || workflowArchetype === "multi_mode_request") {
params.push({
name: "org",
resolver: "dictionary_entity",
required: true,
promptMissing: "Organization parameter is missing.",
promptAmbiguous: "Organization parameter is ambiguous.",
resolverConfig: {},
});
}
if (
(signals.branchFields || []).some((field) => /period|mode/i.test(field)) ||
staticKeys.some((key) => /period/i.test(key))
) {
params.push({
name: "period",
resolver: "month_week_period",
required: true,
promptMissing: "Period parameter is missing.",
promptAmbiguous: "Period parameter is ambiguous.",
resolverConfig: {},
});
}
if ((signals.paginationVars || []).some((value) => /pageSize|rows|limit/i.test(value))) {
params.push({
name: "page_size",
resolver: "literal_passthrough",
required: false,
promptMissing: "",
promptAmbiguous: "",
resolverConfig: {},
});
}
return params;
}
function buildUncertainties(signals, workflowArchetype) {
const issues = [];
if (!(signals.endpoints || []).length) {
issues.push("No API endpoint was detected deterministically.");
}
if (!signals.bootstrapCandidates?.some((candidate) => candidate.validForBootstrap)) {
issues.push("Bootstrap target URL is still inferred weakly.");
}
if (workflowArchetype === "paginated_enrichment" && getBusinessEndpoints(signals).length < 2) {
issues.push("Secondary enrichment request is not strongly confirmed.");
}
if (workflowArchetype === "paginated_enrichment" && !(signals.filterExpressions || []).length && !(signals.exportMethods || []).length) {
issues.push("Paginated enrichment is missing post-process evidence.");
}
if (workflowArchetype === "single_request_enrichment" && !(signals.g1eOutputColumns || []).length) {
issues.push("G1-E output columns are still weakly inferred.");
}
return issues;
}
function scoreConfidence(signals, workflowArchetype) {
let score = 0.3;
if (getBusinessEndpoints(signals).length > 0) score += 0.18;
if ((signals.bootstrapCandidates || []).some((candidate) => candidate.validForBootstrap)) score += 0.12;
if ((signals.responsePaths || []).length > 0) score += 0.08;
if ((signals.entryMethods || []).length > 0) score += 0.06;
if (workflowArchetype === "multi_mode_request" && (signals.modeValues || []).length >= 2) score += 0.14;
if (
workflowArchetype === "paginated_enrichment" &&
(signals.paginationVars || []).length > 0 &&
(signals.secondaryRequestMethods || []).length > 0
) {
score += 0.14;
}
if (
workflowArchetype === "single_request_enrichment" &&
signals.g1eMainRequest &&
(signals.g1eEnrichmentRequests || []).length > 0
) {
score += 0.14;
}
return Math.min(0.95, Number(score.toFixed(2)));
}
function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints, params, workflowSteps, mainRequest, enrichmentRequests, mergePlan, confidence }) {
const risks = [];
const missingPieces = [];
const notes = [];
const businessApiEndpoints = (apiEndpoints || []).filter((endpoint) =>
["business_api", "gateway_api", "business_entry"].includes(endpoint.role)
);
if (!sceneIdDiagnostics?.valid) {
missingPieces.push("invalid_scene_id");
risks.push(`Scene id is invalid${sceneIdDiagnostics?.invalidReason ? `: ${sceneIdDiagnostics.invalidReason}` : "."}`);
}
if (!bootstrap.targetUrl && !bootstrap.expectedDomain) {
missingPieces.push("bootstrap_target");
risks.push("Business bootstrap target is not confirmed.");
} else if (!bootstrap.expectedDomain) {
risks.push("Expected domain is missing; host validation may be weak.");
}
if (!businessApiEndpoints.length) {
missingPieces.push("api_endpoint");
risks.push("No request endpoint detected.");
}
if (!workflowSteps.length) {
missingPieces.push("workflow_steps");
risks.push("Workflow steps are incomplete.");
}
const requestContract = evaluateRequestContract({
workflowArchetype,
apiEndpoints,
params,
workflowSteps,
});
if (!requestContract.passed) {
missingPieces.push(requestContract.reason || "request_contract");
risks.push(requestContract.message);
}
const responseContract = evaluateResponseContract({
workflowArchetype,
workflowSteps,
apiEndpoints,
});
if (!responseContract.passed) {
missingPieces.push(responseContract.reason || "response_contract");
risks.push(responseContract.message);
}
const workflowContract = evaluateWorkflowContract({
workflowArchetype,
workflowSteps,
businessApiEndpoints,
});
if (!workflowContract.passed) {
missingPieces.push(workflowContract.reason || "workflow_contract");
risks.push(workflowContract.message);
}
if (workflowArchetype === "paginated_enrichment") {
const hasPaginate = workflowSteps.some((step) => step.type === "paginate");
const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request");
const hasPostProcess = workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type));
if (!hasPaginate) {
missingPieces.push("paginate_step");
risks.push("Paginated enrichment lacks pagination evidence.");
}
if (!hasSecondary || businessApiEndpoints.length < 2) {
missingPieces.push("secondary_request");
risks.push("Paginated enrichment lacks a strong secondary request signal.");
}
if (!hasPostProcess) {
missingPieces.push("post_process");
risks.push("Paginated enrichment lacks filter/transform/export evidence.");
}
}
if (workflowArchetype === "single_request_enrichment") {
if (!mainRequest) {
missingPieces.push("main_request");
risks.push("G1-E workflow is missing a resolved main request.");
}
if (!(enrichmentRequests || []).length) {
missingPieces.push("enrichment_requests");
risks.push("G1-E workflow is missing enrichment request contracts.");
}
if (!mergePlan) {
missingPieces.push("merge_plan");
risks.push("G1-E workflow is missing merge plan evidence.");
}
}
if (workflowArchetype === "multi_mode_request" && !params.some((param) => param.name === "period")) {
risks.push("Mode-aware workflow is missing a resolved period parameter.");
}
if (confidence < 0.55) {
risks.push("Overall analysis confidence is low.");
}
let level = "A";
if (missingPieces.length > 0) {
level = missingPieces.length >= 2 ? "C" : "B";
} else if (risks.length > 1 || confidence < 0.7) {
level = "B";
}
if (level === "A") {
notes.push("Structure looks complete enough for direct trial.");
} else if (level === "B") {
notes.push("Generation should be reviewed before internal-network execution.");
} else {
notes.push("Manual completion is required before trial.");
}
const gates = [
{
name: "scene_id_valid",
passed: sceneIdDiagnostics?.valid !== false,
reason: sceneIdDiagnostics?.valid === false ? sceneIdDiagnostics.invalidReason || "invalid_scene_id" : null,
},
{
name: "bootstrap_resolved",
passed: Boolean(bootstrap.targetUrl || bootstrap.expectedDomain),
reason: bootstrap.targetUrl || bootstrap.expectedDomain ? null : "bootstrap_target",
},
{
name: "request_contract_complete",
passed: requestContract.passed,
reason: requestContract.passed ? null : requestContract.reason,
},
{
name: "response_contract_complete",
passed: responseContract.passed,
reason: responseContract.passed ? null : responseContract.reason,
},
{
name: "workflow_contract_complete",
passed: workflowContract.passed,
reason: workflowContract.passed ? null : workflowContract.reason,
},
{
name: "workflow_complete_for_archetype",
passed: workflowContract.passed,
reason: workflowContract.passed ? null : workflowContract.reason,
},
{
name: "runtime_contract_compatible",
passed: params.every((param) =>
["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver)
),
reason: params.every((param) =>
["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver)
)
? null
: "runtime_contract_incompatible",
},
{
name: "main_request_resolved",
passed: workflowArchetype !== "single_request_enrichment" || Boolean(mainRequest),
reason: workflowArchetype !== "single_request_enrichment" || mainRequest ? null : "main_request",
},
{
name: "enrichment_requests_resolved",
passed: workflowArchetype !== "single_request_enrichment" || Boolean((enrichmentRequests || []).length),
reason: workflowArchetype !== "single_request_enrichment" || (enrichmentRequests || []).length ? null : "enrichment_requests",
},
{
name: "merge_plan_resolved",
passed: workflowArchetype !== "single_request_enrichment" || Boolean(mergePlan),
reason: workflowArchetype !== "single_request_enrichment" || mergePlan ? null : "merge_plan",
},
{
name: "g1e_scope_compatible",
passed: workflowArchetype !== "single_request_enrichment" || Boolean(mainRequest && (enrichmentRequests || []).length && mergePlan),
reason: workflowArchetype !== "single_request_enrichment" || (mainRequest && (enrichmentRequests || []).length && mergePlan) ? null : "g1e_scope",
},
];
return {
level,
confidence,
gates,
risks,
missingPieces,
notes,
};
}
function evaluateRequestContract({ workflowArchetype, apiEndpoints, params, workflowSteps }) {
const endpointCount = (apiEndpoints || []).filter((endpoint) =>
["business_api", "gateway_api", "business_entry"].includes(endpoint.role)
).length;
const hasRequestStep = (workflowSteps || []).some((step) => ["request", "paginate", "secondary_request"].includes(step.type));
const hasRuntimeInputs = (params || []).length > 0;
if (workflowArchetype === "multi_mode_request") {
const periodParamReady = (params || []).some((param) => param.name === "period");
return periodParamReady && endpointCount > 0
? { passed: true }
: {
passed: false,
reason: endpointCount > 0 ? "request_mode_param" : "request_endpoint",
message: endpointCount > 0
? "Multi-mode request is missing a resolved mode/period contract."
: "Request contract is missing a business endpoint.",
};
}
if (workflowArchetype === "single_request_enrichment") {
return endpointCount >= 2 && hasRequestStep
? { passed: true }
: {
passed: false,
reason: endpointCount >= 2 ? "main_request" : "request_endpoint",
message: endpointCount >= 2
? "G1-E workflow is missing a resolved main request."
: "G1-E workflow requires both main and enrichment business endpoints.",
};
}
if (workflowArchetype === "paginated_enrichment") {
return endpointCount >= 2 && hasRequestStep
? { passed: true }
: {
passed: false,
reason: endpointCount >= 2 ? "request_workflow" : "request_endpoint",
message: endpointCount >= 2
? "Paginated enrichment is missing request-side workflow signals."
: "Paginated enrichment requires both primary and secondary request endpoints.",
};
}
if (workflowArchetype === "page_state_eval") {
return hasRequestStep || endpointCount > 0
? { passed: true }
: {
passed: false,
reason: "request_workflow",
message: "Page-state workflow is missing request or state-evaluation entry signals.",
};
}
return endpointCount > 0 || hasRuntimeInputs
? { passed: true }
: {
passed: false,
reason: "request_endpoint",
message: "Request contract is missing a business endpoint or runtime input.",
};
}
function evaluateResponseContract({ workflowArchetype, workflowSteps, apiEndpoints }) {
const endpointCount = (apiEndpoints || []).filter((endpoint) =>
["business_api", "gateway_api", "business_entry"].includes(endpoint.role)
).length;
const hasTransform = (workflowSteps || []).some((step) => ["transform", "filter", "export"].includes(step.type));
if (workflowArchetype === "single_request_enrichment") {
return endpointCount >= 2 && hasTransform
? { passed: true }
: {
passed: false,
reason: endpointCount >= 2 ? "merge_plan" : "response_path",
message: endpointCount >= 2
? "G1-E workflow is missing merge/transform evidence."
: "G1-E workflow lacks enough response-side endpoint evidence.",
};
}
if (workflowArchetype === "paginated_enrichment") {
return endpointCount >= 2
? { passed: true }
: {
passed: false,
reason: "response_path",
message: "Paginated enrichment lacks enough response-side endpoints to confirm extraction.",
};
}
if (workflowArchetype === "page_state_eval") {
return { passed: true };
}
return endpointCount > 0 || hasTransform
? { passed: true }
: {
passed: false,
reason: "response_path",
message: "Response contract is missing extraction or transform evidence.",
};
}
function evaluateWorkflowContract({ workflowArchetype, workflowSteps, businessApiEndpoints }) {
const hasAnyWorkflow = (workflowSteps || []).length > 0;
if (!hasAnyWorkflow) {
return {
passed: false,
reason: "workflow_steps",
message: "Workflow contract is missing executable steps.",
};
}
if (workflowArchetype === "paginated_enrichment") {
const hasPaginate = workflowSteps.some((step) => step.type === "paginate");
const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request");
const hasPostProcess = workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type));
if (!hasPaginate) {
return {
passed: false,
reason: "paginate_step",
message: "Paginated enrichment lacks pagination evidence.",
};
}
if (!hasSecondary || (businessApiEndpoints || []).length < 2) {
return {
passed: false,
reason: "secondary_request",
message: "Paginated enrichment lacks a strong secondary request signal.",
};
}
if (!hasPostProcess) {
return {
passed: false,
reason: "post_process",
message: "Paginated enrichment lacks filter/transform/export evidence.",
};
}
}
if (workflowArchetype === "single_request_enrichment") {
const hasRequest = workflowSteps.some((step) => step.type === "request");
const hasEnrichment = workflowSteps.some((step) => step.type === "enrichment_request");
const hasTransform = workflowSteps.some((step) => step.type === "transform");
if (!hasRequest || !hasEnrichment || !hasTransform) {
return {
passed: false,
reason: !hasRequest ? "workflow_request" : !hasEnrichment ? "enrichment_requests" : "workflow_transform",
message: "G1-E workflow lacks a complete main/enrichment/transform chain.",
};
}
}
if (workflowArchetype === "multi_mode_request") {
const hasRequest = workflowSteps.some((step) => step.type === "request");
const hasTransform = workflowSteps.some((step) => step.type === "transform");
if (!hasRequest || !hasTransform) {
return {
passed: false,
reason: !hasRequest ? "workflow_request" : "workflow_transform",
message: "Multi-mode request lacks a complete request/transform workflow.",
};
}
}
return { passed: true };
}
function slugifyAscii(value) {
return String(value || "")
.replace(/([a-z0-9])([A-Z])/g, "$1-$2")
.trim()
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "");
}
function humanizeSceneName(value) {
const raw = String(value || "").replace(/[-_]+/g, " ").trim();
if (!raw) return "Generated Scene";
return raw.replace(/\b\w/g, (char) => char.toUpperCase());
}
function uniqueStringValues(list) {
return Array.from(new Set((list || []).filter(Boolean)));
}
module.exports = {
buildAnalysisContext,
buildDeterministicSceneIr,
deriveSceneIdDiagnostics,
readDirectory,
runGenerator,
validateSceneIdCandidate,
};