const fs = require("fs"); const path = require("path"); const { spawn } = require("child_process"); const TEXT_FILE_EXTENSIONS = new Set([ ".html", ".js", ".jsx", ".json", ".md", ".mjs", ".toml", ".ts", ".tsx", ".txt", ".vue", ]); const DEFAULT_BOOTSTRAP = { expectedDomain: "", targetUrl: "", requiresTargetPage: true, pageTitleKeywords: [], source: "deterministic", }; const BOOTSTRAP_ROLE_PRIORITY = ["business_entry", "business_api", "gateway_api"]; const GENERIC_SCENE_IDS = new Set([ "scene", "report", "generated", "generated-scene", "skill", "default-scene", ]); const GENERIC_SCENE_ID_TOKENS = new Set([ "api", "app", "data", "default", "export", "generator", "page", "report", "request", "scene", "skill", "system", "table", "temp", "test", "tmp", ]); const SCENE_ID_ALIAS_RULES = [ { pattern: /营销\s*2(?:\.|。)?0/gi, replacement: "marketing" }, { pattern: /零度户报表数据生成/gi, replacement: "zero-consumer-report" }, { pattern: /零度户报表/gi, replacement: "zero-consumer-report" }, { pattern: /零度户/gi, replacement: "zero-consumer" }, { pattern: /台区线损率统计分析/gi, replacement: "tq-lineloss-analysis" }, { pattern: /台区线损/gi, replacement: "tq-lineloss" }, { pattern: /线损率统计分析/gi, replacement: "lineloss-analysis" }, { pattern: /线损率/gi, replacement: "lineloss-rate" }, { pattern: /线损/gi, replacement: "lineloss" }, { pattern: /台区/gi, replacement: "tq" }, { pattern: /月[_-]?周累计/gi, replacement: "monthly-weekly-cumulative" }, { pattern: /月累计/gi, replacement: "monthly-cumulative" }, { pattern: /周累计/gi, replacement: "weekly-cumulative" }, { pattern: /统计分析/gi, replacement: "analysis" }, { pattern: /报表数据生成/gi, replacement: "report" }, { pattern: /报表/gi, replacement: "report" }, { pattern: /数据生成/gi, replacement: "generator" }, ]; function normalizePath(inputPath) { return inputPath.replace(/\\/g, "/"); } function runGenerator(params, sseWriter, projectRoot) { const { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson, sceneIrJson, completionMeta, } = params; const args = [ "run", "--bin", "sg_scene_generate", "--", "--source-dir", normalizePath(sourceDir), "--scene-id", sceneId, "--scene-name", sceneName, ]; if (sceneKind) { args.push("--scene-kind", sceneKind); } if (targetUrl) { args.push("--target-url", targetUrl); } args.push("--output-root", normalizePath(outputRoot)); if (lessons) { args.push("--lessons", normalizePath(lessons)); } if (sceneInfoJson) { args.push("--scene-info-json", sceneInfoJson); } if (sceneIrJson) { args.push("--scene-ir-json", sceneIrJson); } return new Promise((resolve, reject) => { sseWriter.write( `event: status\ndata: ${JSON.stringify({ message: "Starting skill generation...", })}\n\n` ); sseWriter.write( `event: status\ndata: ${JSON.stringify({ message: `Running: cargo ${args.join(" ")}`, })}\n\n` ); const child = spawn("cargo", args, { cwd: projectRoot, stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, RUST_BACKTRACE: "1" }, }); let stdout = ""; let stderr = ""; let timedOut = false; const timeout = setTimeout(() => { timedOut = true; child.kill("SIGTERM"); sseWriter.write( `event: error\ndata: ${JSON.stringify({ message: "Generation timed out after 5 minutes.", })}\n\n` ); resolve({ success: false, error: "timeout" }); }, 5 * 60 * 1000); child.stdout.on("data", (data) => { const text = data.toString(); stdout += text; sseWriter.write( `event: log\ndata: ${JSON.stringify({ message: text.trim() })}\n\n` ); }); child.stderr.on("data", (data) => { const text = data.toString(); stderr += text; sseWriter.write( `event: log\ndata: ${JSON.stringify({ message: text.trim() })}\n\n` ); }); child.on("close", (code) => { clearTimeout(timeout); if (timedOut) return; if (code === 0) { const match = stdout.match(/generated scene package:\s*(.+)/); const skillRoot = match ? match[1] : null; sseWriter.write( `event: status\ndata: ${JSON.stringify({ message: "Skill generation completed.", })}\n\n` ); sseWriter.write( `event: complete\ndata: ${JSON.stringify({ success: true, skillRoot, readiness: completionMeta?.readiness || null, workflowArchetype: completionMeta?.workflowArchetype || null, confidence: completionMeta?.confidence || 0, })}\n\n` ); resolve({ success: true, skillRoot }); } else { sseWriter.write( `event: error\ndata: ${JSON.stringify({ message: `Generation failed (exit code ${code})`, })}\n\n` ); if (stderr.trim()) { sseWriter.write( `event: error\ndata: ${JSON.stringify({ message: stderr.substring(0, 500), })}\n\n` ); } resolve({ success: false, code, stderr }); } }); child.on("error", (err) => { clearTimeout(timeout); sseWriter.write( `event: error\ndata: ${JSON.stringify({ message: `Failed to start cargo: ${err.message}`, })}\n\n` ); reject(err); }); }); } function readDirectory(sourceDir) { if (!fs.existsSync(sourceDir)) { throw new Error(`Directory not found: ${sourceDir}`); } const stat = fs.statSync(sourceDir); if (!stat.isDirectory()) { throw new Error(`Not a directory: ${sourceDir}`); } const files = []; const treeLines = []; walkDirectory(sourceDir, "", files, treeLines); const result = { tree: treeLines.join("\n"), files, }; for (const file of files) { const baseName = path.basename(file.path); if (baseName === "scene.toml") result["scene.toml"] = file.content; if (baseName === "SKILL.toml") result["SKILL.toml"] = file.content; if (baseName === "SKILL.md") result["SKILL.md"] = file.content; if (baseName === "index.html" && !result.indexHtml) result.indexHtml = file.content; } const scripts = {}; for (const file of files) { if (file.path.endsWith(".js")) { scripts[file.path] = file.content; } } if (Object.keys(scripts).length > 0) { result.scripts = scripts; } result.analysisContext = buildAnalysisContext(sourceDir, result); result.deterministic = buildDeterministicSceneIr(result.analysisContext, sourceDir); return result; } function walkDirectory(rootDir, relativeDir, files, treeLines) { const absoluteDir = relativeDir ? path.join(rootDir, relativeDir) : rootDir; const entries = fs .readdirSync(absoluteDir, { withFileTypes: true }) .sort((a, b) => a.name.localeCompare(b.name, "en")); for (const entry of entries) { const relativePath = relativeDir ? path.posix.join(normalizePath(relativeDir), entry.name) : entry.name; treeLines.push(`${entry.isDirectory() ? "[D]" : "[F]"} ${relativePath}`); if (entry.isDirectory()) { walkDirectory(rootDir, relativePath, files, treeLines); continue; } const ext = path.extname(entry.name).toLowerCase(); if (!TEXT_FILE_EXTENSIONS.has(ext)) { continue; } const absolutePath = path.join(absoluteDir, entry.name); const stat = fs.statSync(absolutePath); if (stat.size > 1024 * 1024) { continue; } const content = fs.readFileSync(absolutePath, "utf-8"); files.push({ path: normalizePath(relativePath), content, }); } } function buildAnalysisContext(sourceDir, dirContents) { const files = Array.isArray(dirContents.files) ? dirContents.files : []; const indexHtml = dirContents.indexHtml || ""; const directorySummary = { sourceDir: normalizePath(sourceDir), tree: dirContents.tree || "(empty)", files: files.map((file) => ({ path: file.path, length: file.content.length, })), }; const THIRD_PARTY_JS_PATTERNS = [ "vue.js", "vue.min.js", "element-ui", "elementui", "axios", "jquery", "jquery.min.js", "echarts", "echarts.min.js", ]; const dirFiles = dirContents.files || []; const businessJsFragments = []; for (const file of dirFiles) { if (!file.path.startsWith("js/") || !file.path.endsWith(".js")) continue; const baseName = path.basename(file.path).toLowerCase(); if (THIRD_PARTY_JS_PATTERNS.some((pattern) => baseName.includes(pattern))) continue; businessJsFragments.push({ path: file.path, snippet: file.content.length > 600 ? file.content.slice(0, 600) : file.content, }); } return { directorySummary, indexHtmlChunks: chunkFile("index.html", indexHtml, 3000, 2), urlFragments: collectFragments(files, isUrlFragment, 10), requestFragments: collectFragments(files, isRequestFragment, 10), branchingFragments: collectFragments(files, isBranchFragment, 8), responseFragments: collectFragments(files, isResponseFragment, 8), exportFragments: collectFragments(files, isExportFragment, 6), bootstrapHints: collectBootstrapHints(files, indexHtml), deterministicSignals: collectDeterministicSignals(files, indexHtml), businessJsFragments, }; } function chunkFile(filePath, content, chunkSize, maxChunks) { if (!content) return []; const chunks = []; let index = 0; let offset = 0; while (offset < content.length && chunks.length < maxChunks) { chunks.push({ path: filePath, index, start: offset, end: Math.min(offset + chunkSize, content.length), content: content.slice(offset, offset + chunkSize), }); index += 1; offset += chunkSize; } return chunks; } function collectFragments(files, predicate, limit) { const fragments = []; for (const file of files) { const fileFragments = extractLineFragments(file.path, file.content, predicate, limit - fragments.length); fragments.push(...fileFragments); if (fragments.length >= limit) { break; } } return fragments; } function extractLineFragments(filePath, content, predicate, remainingLimit) { if (!content || remainingLimit <= 0) return []; const fragments = []; const lines = content.split(/\r?\n/); const seen = new Set(); for (let i = 0; i < lines.length && fragments.length < remainingLimit; i += 1) { if (!predicate(lines[i])) continue; const start = Math.max(0, i - 3); const end = Math.min(lines.length, i + 4); const snippet = lines.slice(start, end).join("\n").trim(); if (!snippet || seen.has(snippet)) continue; seen.add(snippet); fragments.push({ path: filePath, lineStart: start + 1, lineEnd: end, snippet: snippet.length > 1200 ? snippet.slice(0, 1200) : snippet, }); } return fragments; } function isUrlFragment(line) { return /(https?:\/\/|['"`]\/[^'"`\s]+|url\s*:|fetch\s*\(|axios\.(get|post|request)|\$\.(ajax|get|post))/i.test(line); } function isRequestFragment(line) { return /(contentType|JSON\.stringify|requestBody|requestData|payload|params|data\s*:|body\s*:|FormData|\bpage(Size|No)?\b|\brows\b)/i.test(line); } function isBranchFragment(line) { return /\b(if|else if|switch|case)\b/.test(line) && /(mode|period|reportType|tab|status|scene|type)/i.test(line); } function isResponseFragment(line) { return /(response|res|result)\.(data|content|rows|list|records|items)|\.map\(|\.filter\(|columnDefs|columns|normalize|transform/i.test(line); } function isExportFragment(line) { return /(export|download|xlsx|csv|blob|saveAs|excel)/i.test(line); } function collectBootstrapHints(files, indexHtml) { const hints = []; const seen = new Set(); for (const match of indexHtml.matchAll(/<(a|form|iframe)[^>]+(?:href|action|src)=["']([^"']+)["']/gi)) { const url = match[2]; if (url && !isStaticAssetUrl(url) && !seen.has(url)) { seen.add(url); hints.push({ type: match[1], url }); } } for (const file of files) { const namedUrlMatches = file.content.matchAll( /\b(sourceUrl|sourceURL|baseUrl|baseURL|targetUrl|requestUrl|apiUrl|gatewayUrl|loginPath|mainPath)\b\s*[:=]\s*(['"`])(https?:\/\/[^'"`\s]+)\2/gi ); for (const match of namedUrlMatches) { const url = match[3]; const type = String(match[1] || "").toLowerCase(); if (url && !seen.has(url)) { seen.add(url); // loginPath/mainPath are bootstrap hints — the domain is expected_domain if (type === "loginpath" || type === "mainpath") { const domain = new URL(url).hostname; hints.push({ type: "expected_domain", value: domain, path: file.path }); hints.push({ type: "target_url", value: url, path: file.path }); } else { hints.push({ type, url, path: file.path }); } } } const matches = file.content.matchAll(/window\.open\((['"`])([^'"`]+)\1|location\.(?:href|assign|replace)\((['"`])([^'"`]+)\3/gi); for (const match of matches) { const url = match[2] || match[4]; if (url && !seen.has(url)) { seen.add(url); hints.push({ type: "navigation", url, path: file.path }); } } } return hints.slice(0, 12); } function collectDeterministicSignals(files, indexHtml) { const urls = new Map(); const methods = new Map(); const responsePaths = new Set(); const branchFields = new Set(); const modeValues = new Set(); const paginationVars = new Set(); const filterExpressions = new Set(); const entryMethods = new Set(); const exportMethods = new Set(); const secondaryRequestMethods = new Set(); const pageTitleKeywords = new Set(); const staticParams = {}; for (const file of files) { const content = file.content; for (const endpoint of extractEndpoints(content)) { const key = `${endpoint.method}|${endpoint.url}`; if (!urls.has(key)) { urls.set(key, endpoint); } } for (const match of content.matchAll(/\b(type|method)\s*:\s*['"`](GET|POST|PUT|DELETE|PATCH)['"`]/gi)) { methods.set(match[2].toUpperCase(), true); } for (const match of content.matchAll(/\b(?:response|res|result)\.(data|content|rows|list|records|items)\b/g)) { responsePaths.add(match[1]); } for (const match of content.matchAll(/\b(?:if|switch|case)\b[\s\S]{0,120}?(period_mode|reportType|mode|tab|sceneType|status|type)\b/gi)) { branchFields.add(match[1]); } for (const match of content.matchAll(/\b(period_mode|reportType|mode|tab)\b[\s\S]{0,80}?['"`](month|week|day|detail|summary|list|chart)['"`]/gi)) { modeValues.add(match[2]); } for (const match of content.matchAll(/\b(pageSize|pageNo|pageNum|page|rows|limit|offset)\b/g)) { paginationVars.add(match[1]); } for (const match of content.matchAll(/\.filter\(\s*(.+?)\s*\)|if\s*\(([^)]*(?:!==|!=|===|==|>|<)[^)]*)\)/g)) { const expr = (match[1] || match[2] || "").trim(); if (expr && expr.length <= 160) { filterExpressions.add(expr); } } for (const match of content.matchAll(/(?:function\s+|const\s+|let\s+|var\s+)([A-Za-z_$][\w$]*)\s*(?:=\s*(?:async\s*)?\(|\()/g)) { const name = match[1]; if (/(query|search|load|fetch|init|mounted|created|getData)/i.test(name)) { entryMethods.add(name); } if (/(export|download|excel|csv)/i.test(name)) { exportMethods.add(name); } if (/(detail|charge|charges|info|details)/i.test(name)) { secondaryRequestMethods.add(name); } } for (const match of content.matchAll(/(?:title|document\.title)\s*[:=]\s*['"`]([^'"`]{2,40})['"`]/gi)) { pageTitleKeywords.add(match[1]); } const staticParamMatches = content.matchAll(/\b(orgNo|orgCode|orgId|period_mode|reportType|pageSize|rows)\b\s*:\s*['"`]([^'"`\n]+)['"`]/gi); for (const match of staticParamMatches) { if (!(match[1] in staticParams)) { staticParams[match[1]] = match[2]; } } } for (const match of indexHtml.matchAll(/([^<]{2,40})<\/title>/gi)) { pageTitleKeywords.add(match[1].trim()); } const bootstrapCandidates = collectBootstrapCandidates(files, indexHtml, Array.from(urls.values())); return { endpoints: Array.from(urls.values()), methods: Array.from(methods.keys()), responsePaths: Array.from(responsePaths), branchFields: Array.from(branchFields), modeValues: Array.from(modeValues), paginationVars: Array.from(paginationVars), filterExpressions: Array.from(filterExpressions).slice(0, 8), entryMethods: Array.from(entryMethods).slice(0, 10), exportMethods: Array.from(exportMethods).slice(0, 10), secondaryRequestMethods: Array.from(secondaryRequestMethods).slice(0, 10), pageTitleKeywords: Array.from(pageTitleKeywords).slice(0, 10), staticParams, bootstrapCandidates, }; } function extractEndpoints(content) { const endpoints = []; const seen = new Set(); const lines = content.split(/\r?\n/); for (let index = 0; index < lines.length; index += 1) { const line = lines[index]; if (!isUrlFragment(line)) continue; const block = lines.slice(Math.max(0, index - 2), Math.min(lines.length, index + 5)).join("\n"); const urlMatch = block.match(/\burl\s*:\s*(['"`])([^'"`]+)\1/i) || block.match(/fetch\s*\(\s*(['"`])([^'"`]+)\1/i) || block.match(/axios\.(?:get|post|request)\s*\(\s*(['"`])([^'"`]+)\1/i); if (!urlMatch) continue; const url = sanitizeUrl(urlMatch[2]); if (!url) continue; const methodMatch = block.match(/\b(?:type|method)\s*:\s*(['"`])([A-Z]+)\1/i) || block.match(/\baxios\.post\s*\(/i) || block.match(/\baxios\.get\s*\(/i); const method = methodMatch ? String(methodMatch[2] || (methodMatch[0].includes(".post") ? "POST" : "GET")).toUpperCase() : "GET"; const contentTypeMatch = block.match(/\bcontentType\s*:\s*(['"`])([^'"`]+)\1/i); const name = inferEndpointName(block, url, endpoints.length); const role = classifyRequestRole(url); const key = `${method}|${url}`; if (seen.has(key)) continue; seen.add(key); endpoints.push({ name, url, role, method, contentType: contentTypeMatch ? contentTypeMatch[2] : null, description: `Detected from source snippet around line ${index + 1}`, }); } return endpoints.slice(0, 12); } function sanitizeUrl(rawUrl) { if (!rawUrl) return ""; const value = rawUrl.trim(); if (!value) return ""; if (isStaticAssetUrl(value)) return ""; if (isTemplateNoiseUrl(value)) return ""; if (/^(javascript:|data:|#)/i.test(value)) return ""; if (/\.js(\?|$)|\.css(\?|$)|\.png(\?|$)|\.svg(\?|$)/i.test(value)) return ""; return value; } function classifyRequestRole(rawUrl) { const value = String(rawUrl || "").toLowerCase(); if (!value || isTemplateNoiseUrl(value)) return "template_noise"; if (value.includes("localhost") || value.includes("127.0.0.1")) { return /(surfaceservices|reportservices|export)/i.test(value) ? "export_service" : "local_helper"; } if (value.includes("gateway")) return "gateway_api"; return "business_api"; } function inferEndpointName(block, url, index) { const functionMatch = block.match(/(?:function|const|let|var)\s+([A-Za-z_$][\w$]*)/); if (functionMatch) return functionMatch[1]; const pathParts = url.split(/[/?#]/).filter(Boolean); return pathParts[pathParts.length - 1] || `endpoint_${index + 1}`; } function collectBootstrapCandidates(files, indexHtml, endpoints) { const candidates = []; const seen = new Set(); for (const endpoint of endpoints) { const candidate = buildBootstrapCandidate(endpoint.url, "api_endpoint"); const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : ""; if (candidate && !seen.has(key)) { seen.add(key); candidates.push(candidate); } } for (const hint of collectBootstrapHints(files, indexHtml)) { const candidate = buildBootstrapCandidate(hint.url, hint.type); const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : ""; if (candidate && !seen.has(key)) { seen.add(key); candidates.push(candidate); } } return candidates.slice(0, 8); } function buildBootstrapCandidate(rawUrl, source) { if (!rawUrl) return null; try { const url = new URL(rawUrl, "http://placeholder.local"); const role = classifyBootstrapRole(rawUrl, source, url); if (!role) { return null; } const isAbsolute = /^https?:\/\//i.test(rawUrl); const targetUrl = isAbsolute ? role === "business_api" || role === "gateway_api" ? url.origin : `${url.origin}${url.pathname}` : rawUrl; return { expectedDomain: isAbsolute ? url.hostname : "", targetUrl, source, role, validForBootstrap: BOOTSTRAP_ROLE_PRIORITY.includes(role), }; } catch (_) { return null; } } function classifyBootstrapRole(rawUrl, source, parsedUrl) { const value = String(rawUrl || "").trim(); if (!value || isTemplateNoiseUrl(value)) return "template_noise"; if (isStaticAssetUrl(value)) return "static_asset"; if (/^(javascript:|data:|#)/i.test(value)) return "template_noise"; const isAbsolute = /^https?:\/\//i.test(value); if (!isAbsolute) { return value.startsWith("/") ? "business_entry" : "template_noise"; } const hostname = String(parsedUrl?.hostname || "").toLowerCase(); const pathname = String(parsedUrl?.pathname || ""); if (hostname === "localhost" || hostname === "127.0.0.1") { if (/(SurfaceServices|ReportServices|export)/i.test(pathname)) { return "export_service"; } return "local_helper"; } if (/(gateway)/i.test(hostname) || /(gateway)/i.test(value)) { return "gateway_api"; } if (/(sourceurl|targeturl|navigation|form|iframe|a)/i.test(source)) { return "business_entry"; } if (/(apiurl|requesturl|baseurl|api_endpoint)/i.test(source) || /\/api\//i.test(pathname)) { return "business_api"; } return "business_entry"; } function isTemplateNoiseUrl(rawUrl) { return /\$\{[^}]+\}|%s|placeholder|not a valid/i.test(rawUrl); } function isStaticAssetUrl(rawUrl) { return /(?:cdn|static|assets?)|(?:\.js|\.css|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.ico)(?:\?|$)/i.test(rawUrl); } function buildDeterministicSceneIr(context, sourceDir) { const signals = context.deterministicSignals || {}; const sceneName = humanizeSceneName(path.basename(sourceDir)); const sceneIdDiagnostics = deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }); const sceneId = sceneIdDiagnostics.selected || ""; const workflowArchetype = classifyWorkflowArchetype(signals); const evidence = buildEvidence(signals, workflowArchetype); const bootstrap = deriveBootstrap(signals); const modes = buildModes(signals); const workflowSteps = buildWorkflowSteps(signals, workflowArchetype); const workflowEvidence = buildWorkflowEvidence(signals); const responsePath = signals.responsePaths?.[0] || ""; const normalizeRules = buildNormalizeRules(signals); const params = buildParams(signals, workflowArchetype); const confidence = scoreConfidence(signals, workflowArchetype); const readiness = buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints: signals.endpoints || [], params, workflowSteps, confidence, }); return { sceneId, sceneIdDiagnostics, sceneName, sceneKind: "report_collection", workflowArchetype, bootstrap, params, modes, defaultMode: modes[0] ? modes[0].name : null, modeSwitchField: signals.branchFields?.find((field) => /mode|period/i.test(field)) || null, workflowSteps, workflowEvidence, requestTemplate: {}, responsePath, normalizeRules, artifactContract: { type: "report-artifact", successStatus: ["ok", "partial", "empty"], failureStatus: ["blocked", "error"], }, validationHints: { requiresTargetPage: true, runtimeCompatible: params.every((param) => ["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver) ), manualCompletionRequired: readiness.level !== "A", missingPieces: readiness.missingPieces.slice(), }, evidence, readiness, apiEndpoints: signals.endpoints || [], staticParams: signals.staticParams || {}, columnDefs: [], confidence, uncertainties: buildUncertainties(signals, workflowArchetype), deterministicSignals: signals, }; } function deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }) { const baseName = path.basename(sourceDir || ""); const candidates = []; const seen = new Set(); function pushCandidate(value, source) { const normalized = slugifyAscii(value); if (!normalized || seen.has(normalized)) return; seen.add(normalized); const validation = validateSceneIdCandidate(normalized, { sceneName, sourceDir, signals }); candidates.push({ value: normalized, source, valid: validation.valid, reason: validation.valid ? null : validation.reason, }); } pushCandidate(buildKeywordSceneId(baseName, sceneName, signals), "deterministic_keywords"); pushCandidate(buildAliasSceneId(baseName), "controlled_alias"); pushCandidate(slugifyAscii(baseName), "directory_slug"); const selectedCandidate = candidates.find((candidate) => candidate.valid) || candidates[0] || null; return { selected: selectedCandidate?.value || "", candidateSource: selectedCandidate?.source || "", valid: Boolean(selectedCandidate?.valid), invalidReason: selectedCandidate && !selectedCandidate.valid ? selectedCandidate.reason : null, candidates, }; } function buildKeywordSceneId(baseName, sceneName, signals) { const aliasedCandidates = Array.from( new Set( [baseName, sceneName] .filter(Boolean) .map((value) => slugifyAscii(applySceneIdAliases(value))) .filter(Boolean) ) ).sort((left, right) => right.length - left.length); const aliased = aliasedCandidates[0] || ""; if (aliased && aliased.split("-").length >= 2) { return aliased; } const tokens = extractBusinessTokens( [baseName, sceneName] .filter(Boolean) .join(" "), signals ); if (!tokens.length) return ""; return tokens.slice(0, 5).join("-"); } function buildAliasSceneId(baseName) { if (!baseName) return ""; let value = String(baseName || ""); for (const rule of SCENE_ID_ALIAS_RULES) { value = value.replace(rule.pattern, ` ${rule.replacement} `); } return slugifyAscii(value); } function extractBusinessTokens(rawText, signals = {}) { const candidates = []; const pushTokens = (value) => { const slug = slugifyAscii(value); if (!slug) return; for (const token of slug.split("-")) { if (!token || token.length < 2 || GENERIC_SCENE_ID_TOKENS.has(token)) continue; candidates.push(token); } }; pushTokens(applySceneIdAliases(rawText)); for (const keyword of signals.pageTitleKeywords || []) { pushTokens(applySceneIdAliases(keyword)); } for (const endpoint of signals.endpoints || []) { pushTokens(endpoint?.name); const urlValue = String(endpoint?.url || ""); const segments = urlValue .split(/[/?#=&._-]+/) .filter(Boolean) .slice(-4); for (const segment of segments) { pushTokens(segment); } } return Array.from(new Set(candidates)); } function applySceneIdAliases(value) { let text = String(value || ""); for (const rule of SCENE_ID_ALIAS_RULES) { text = text.replace(rule.pattern, ` ${rule.replacement} `); } return text; } function validateSceneIdCandidate(sceneId, { sceneName = "", sourceDir = "", signals = {} } = {}) { const normalized = slugifyAscii(sceneId); if (!normalized) { return { valid: false, reason: "empty_scene_id" }; } if (GENERIC_SCENE_IDS.has(normalized)) { return { valid: false, reason: "generic_scene_id" }; } const letters = (normalized.match(/[a-z]/g) || []).length; const digits = (normalized.match(/\d/g) || []).length; if (!letters) { return { valid: false, reason: "numeric_only_scene_id" }; } if ((normalized.length < 5 || letters < 3) && !normalized.includes("-")) { return { valid: false, reason: "scene_id_too_short" }; } if (digits > letters && letters < 4) { return { valid: false, reason: "numeric_dominant_scene_id" }; } const expectedTokens = extractBusinessTokens( [sceneName, path.basename(sourceDir || "")] .filter(Boolean) .join(" "), signals ).filter((token) => token.length >= 3); if (expectedTokens.length) { const matched = expectedTokens.some((token) => normalized.includes(token)); if (!matched) { return { valid: false, reason: "scene_id_semantic_detached" }; } } return { valid: true, reason: null }; } function classifyWorkflowArchetype(signals) { const businessEndpoints = getBusinessEndpoints(signals); const hasPagination = (signals.paginationVars || []).length > 0; const hasSecondaryRequest = (signals.secondaryRequestMethods || []).length > 0 || businessEndpoints.length >= 2; const hasPostProcess = (signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0; if (hasPagination && hasSecondaryRequest && hasPostProcess) { return "paginated_enrichment"; } const hasModeBranch = (signals.branchFields || []).some((field) => /period_mode|reportType|tjzq|mode/i.test(field)); const hasModeValues = (signals.modeValues || []).length >= 2; const hasModeDivergence = hasModeValues && businessEndpoints.length >= 2; if (hasModeBranch && hasModeDivergence) { return "multi_mode_request"; } const pageStateSignals = [...(signals.entryMethods || []), ...(signals.filterExpressions || [])].join(" "); if (/(state|status|ready|available|enabled)/i.test(pageStateSignals) && businessEndpoints.length <= 1) { return "page_state_eval"; } return "single_request_table"; } function buildEvidence(signals, workflowArchetype) { const evidence = []; const businessEndpoints = getBusinessEndpoints(signals); if (businessEndpoints.length > 0) { evidence.push({ kind: "deterministic", summary: `Detected ${businessEndpoints.length} business API endpoint(s).`, source: "runner", confidence: 0.9, }); } if ((signals.branchFields || []).length > 0) { evidence.push({ kind: "deterministic", summary: `Branch fields: ${signals.branchFields.join(", ")}`, source: "runner", confidence: 0.86, }); } if ((signals.paginationVars || []).length > 0) { evidence.push({ kind: "deterministic", summary: `Pagination vars: ${signals.paginationVars.join(", ")}`, source: "runner", confidence: 0.84, }); } if ((signals.secondaryRequestMethods || []).length > 0) { evidence.push({ kind: "deterministic", summary: `Secondary request methods: ${signals.secondaryRequestMethods.join(", ")}`, source: "runner", confidence: 0.82, }); } if ((signals.exportMethods || []).length > 0) { evidence.push({ kind: "deterministic", summary: `Export methods: ${signals.exportMethods.join(", ")}`, source: "runner", confidence: 0.78, }); } evidence.push({ kind: "classification", summary: `Workflow archetype classified as ${workflowArchetype}.`, source: "runner", confidence: 0.72, }); return evidence; } function deriveBootstrap(signals) { const candidate = BOOTSTRAP_ROLE_PRIORITY .map((role) => (signals.bootstrapCandidates || []).find( (item) => item.role === role && item.validForBootstrap && item.targetUrl ) ) .find(Boolean); if (candidate) { return { expectedDomain: candidate.expectedDomain || "", targetUrl: candidate.targetUrl || "", requiresTargetPage: true, pageTitleKeywords: signals.pageTitleKeywords || [], source: candidate.source || "deterministic", }; } return { ...DEFAULT_BOOTSTRAP, pageTitleKeywords: signals.pageTitleKeywords || [], }; } function getBusinessEndpoints(signals) { return (signals.endpoints || []).filter((endpoint) => ["business_api", "gateway_api", "business_entry"].includes(endpoint.role) ); } function buildModes(signals) { const values = (signals.modeValues || []).slice(0, 4); if (!values.length) return []; return values.map((value, index) => ({ name: value, label: value, condition: { field: signals.branchFields?.find((field) => /mode|period|tab|type/i.test(field)) || "period_mode", operator: "equals", value, }, apiEndpoint: signals.endpoints?.[index] || signals.endpoints?.[0] || null, columnDefs: [], requestTemplate: {}, normalizeRules: buildNormalizeRules(signals), responsePath: signals.responsePaths?.[0] || "", })); } function buildWorkflowSteps(signals, workflowArchetype) { const steps = []; const businessEndpoints = getBusinessEndpoints(signals); const primaryEndpoint = businessEndpoints[0]?.name || null; const secondaryEndpoint = businessEndpoints[1]?.name || null; if (workflowArchetype === "multi_mode_request") { steps.push({ type: "request", entry: signals.entryMethods?.[0] || null, endpoint: primaryEndpoint, description: "Select mode and query the matching endpoint.", }); steps.push({ type: "transform", description: "Normalize mode-specific rows into a shared artifact.", }); return steps; } if (workflowArchetype === "paginated_enrichment") { steps.push({ type: "paginate", entry: signals.entryMethods?.[0] || null, endpoint: primaryEndpoint, description: "Iterate primary list pages.", }); if ((signals.secondaryRequestMethods || []).length > 0 || secondaryEndpoint) { steps.push({ type: "secondary_request", entry: signals.secondaryRequestMethods?.[0] || signals.entryMethods?.[1] || null, endpoint: secondaryEndpoint, description: "Fetch per-row or batched detail data.", }); } if (signals.filterExpressions?.[0]) { steps.push({ type: "filter", expr: signals.filterExpressions[0], description: "Apply business-side filtering.", }); } if (signals.exportMethods?.[0]) { steps.push({ type: "export", entry: signals.exportMethods[0], description: "Prepare export payload or trigger download logic.", }); } return steps; } if (workflowArchetype === "page_state_eval") { steps.push({ type: "page_state", entry: signals.entryMethods?.[0] || null, description: "Evaluate page state and derive readiness outcome.", }); return steps; } steps.push({ type: "request", entry: signals.entryMethods?.[0] || null, endpoint: primaryEndpoint, description: "Issue the primary scene request.", }); steps.push({ type: "transform", description: "Normalize the primary response.", }); return steps; } function buildNormalizeRules(signals) { return { type: "validate_required", requiredFields: [], filterNull: true, responseHints: signals.responsePaths || [], }; } function buildWorkflowEvidence(signals) { return { requestEntries: uniqueStringValues([ ...(signals.entryMethods || []).slice(0, 3), ...getBusinessEndpoints(signals).map((endpoint) => endpoint.name).slice(0, 3), ]), paginationFields: uniqueStringValues(signals.paginationVars || []), secondaryRequestEntries: uniqueStringValues([ ...(signals.secondaryRequestMethods || []), ...getBusinessEndpoints(signals).slice(1, 3).map((endpoint) => endpoint.name), ]), postProcessSteps: uniqueStringValues([ ...(signals.filterExpressions || []).map(() => "filter"), ...(signals.exportMethods || []).map(() => "export"), ]), }; } function buildParams(signals, workflowArchetype) { const params = []; const staticKeys = Object.keys(signals.staticParams || {}); if (staticKeys.some((key) => /org/i.test(key)) || workflowArchetype === "multi_mode_request") { params.push({ name: "org", resolver: "dictionary_entity", required: true, promptMissing: "Organization parameter is missing.", promptAmbiguous: "Organization parameter is ambiguous.", resolverConfig: {}, }); } if ( (signals.branchFields || []).some((field) => /period|mode/i.test(field)) || staticKeys.some((key) => /period/i.test(key)) ) { params.push({ name: "period", resolver: "month_week_period", required: true, promptMissing: "Period parameter is missing.", promptAmbiguous: "Period parameter is ambiguous.", resolverConfig: {}, }); } if ((signals.paginationVars || []).some((value) => /pageSize|rows|limit/i.test(value))) { params.push({ name: "page_size", resolver: "literal_passthrough", required: false, promptMissing: "", promptAmbiguous: "", resolverConfig: {}, }); } return params; } function buildUncertainties(signals, workflowArchetype) { const issues = []; if (!(signals.endpoints || []).length) { issues.push("No API endpoint was detected deterministically."); } if (!signals.bootstrapCandidates?.some((candidate) => candidate.validForBootstrap)) { issues.push("Bootstrap target URL is still inferred weakly."); } if (workflowArchetype === "paginated_enrichment" && getBusinessEndpoints(signals).length < 2) { issues.push("Secondary enrichment request is not strongly confirmed."); } if (workflowArchetype === "paginated_enrichment" && !(signals.filterExpressions || []).length && !(signals.exportMethods || []).length) { issues.push("Paginated enrichment is missing post-process evidence."); } return issues; } function scoreConfidence(signals, workflowArchetype) { let score = 0.3; if (getBusinessEndpoints(signals).length > 0) score += 0.18; if ((signals.bootstrapCandidates || []).some((candidate) => candidate.validForBootstrap)) score += 0.12; if ((signals.responsePaths || []).length > 0) score += 0.08; if ((signals.entryMethods || []).length > 0) score += 0.06; if (workflowArchetype === "multi_mode_request" && (signals.modeValues || []).length >= 2) score += 0.14; if ( workflowArchetype === "paginated_enrichment" && (signals.paginationVars || []).length > 0 && (signals.secondaryRequestMethods || []).length > 0 ) { score += 0.14; } return Math.min(0.95, Number(score.toFixed(2))); } function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints, params, workflowSteps, confidence }) { const risks = []; const missingPieces = []; const notes = []; const businessApiEndpoints = (apiEndpoints || []).filter((endpoint) => ["business_api", "gateway_api", "business_entry"].includes(endpoint.role) ); if (!sceneIdDiagnostics?.valid) { missingPieces.push("invalid_scene_id"); risks.push(`Scene id is invalid${sceneIdDiagnostics?.invalidReason ? `: ${sceneIdDiagnostics.invalidReason}` : "."}`); } if (!bootstrap.targetUrl && !bootstrap.expectedDomain) { missingPieces.push("bootstrap_target"); risks.push("Business bootstrap target is not confirmed."); } else if (!bootstrap.expectedDomain) { risks.push("Expected domain is missing; host validation may be weak."); } if (!businessApiEndpoints.length) { missingPieces.push("api_endpoint"); risks.push("No request endpoint detected."); } if (!workflowSteps.length) { missingPieces.push("workflow_steps"); risks.push("Workflow steps are incomplete."); } if (workflowArchetype === "paginated_enrichment") { const hasPaginate = workflowSteps.some((step) => step.type === "paginate"); const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request"); const hasPostProcess = workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type)); if (!hasPaginate) { missingPieces.push("paginate_step"); risks.push("Paginated enrichment lacks pagination evidence."); } if (!hasSecondary || businessApiEndpoints.length < 2) { missingPieces.push("secondary_request"); risks.push("Paginated enrichment lacks a strong secondary request signal."); } if (!hasPostProcess) { missingPieces.push("post_process"); risks.push("Paginated enrichment lacks filter/transform/export evidence."); } } if (workflowArchetype === "multi_mode_request" && !params.some((param) => param.name === "period")) { risks.push("Mode-aware workflow is missing a resolved period parameter."); } if (confidence < 0.55) { risks.push("Overall analysis confidence is low."); } let level = "A"; if (missingPieces.length > 0) { level = missingPieces.length >= 2 ? "C" : "B"; } else if (risks.length > 1 || confidence < 0.7) { level = "B"; } if (level === "A") { notes.push("Structure looks complete enough for direct trial."); } else if (level === "B") { notes.push("Generation should be reviewed before internal-network execution."); } else { notes.push("Manual completion is required before trial."); } const gates = [ { name: "scene_id_valid", passed: sceneIdDiagnostics?.valid !== false, reason: sceneIdDiagnostics?.valid === false ? sceneIdDiagnostics.invalidReason || "invalid_scene_id" : null, }, { name: "bootstrap_resolved", passed: Boolean(bootstrap.targetUrl || bootstrap.expectedDomain), reason: bootstrap.targetUrl || bootstrap.expectedDomain ? null : "bootstrap_target", }, { name: "workflow_complete_for_archetype", passed: !missingPieces.some((item) => ["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item) ), reason: missingPieces.find((item) => ["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item) ) || null, }, { name: "runtime_contract_compatible", passed: params.every((param) => ["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver) ), reason: params.every((param) => ["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver) ) ? null : "runtime_contract_incompatible", }, ]; return { level, confidence, gates, risks, missingPieces, notes, }; } function slugifyAscii(value) { return String(value || "") .replace(/([a-z0-9])([A-Z])/g, "$1-$2") .trim() .toLowerCase() .replace(/[^a-z0-9]+/g, "-") .replace(/^-+|-+$/g, ""); } function humanizeSceneName(value) { const raw = String(value || "").replace(/[-_]+/g, " ").trim(); if (!raw) return "Generated Scene"; return raw.replace(/\b\w/g, (char) => char.toUpperCase()); } function uniqueStringValues(list) { return Array.from(new Set((list || []).filter(Boolean))); } module.exports = { buildAnalysisContext, buildDeterministicSceneIr, deriveSceneIdDiagnostics, readDirectory, runGenerator, validateSceneIdCandidate, };