From 0fcdfb1787b6932dabfe547015c51380185fce78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=A8=E7=82=8E?= <635735027@qq.com> Date: Fri, 17 Apr 2026 18:43:04 +0800 Subject: [PATCH] feat(scene-generator): extract business JS files for LLM analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Identify and push js/ directory business logic files (mca.js, sgApi.js, etc.) to the LLM prompt. Exclude third-party libraries. Capped at 4 fragments to stay within MAX_DEEP_PROMPT_CHARS budget. 🤖 Generated with [Qoder][https://qoder.com] --- frontend/scene-generator/generator-runner.js | 1294 +++++++++++++++++- frontend/scene-generator/llm-client.js | 1 + 2 files changed, 1247 insertions(+), 48 deletions(-) diff --git a/frontend/scene-generator/generator-runner.js b/frontend/scene-generator/generator-runner.js index 016fdfb..bdc9bc6 100644 --- a/frontend/scene-generator/generator-runner.js +++ b/frontend/scene-generator/generator-runner.js @@ -1,10 +1,96 @@ -const { spawn } = require("child_process"); +const fs = require("fs"); const path = require("path"); +const { spawn } = require("child_process"); + +const TEXT_FILE_EXTENSIONS = new Set([ + ".html", + ".js", + ".jsx", + ".json", + ".md", + ".mjs", + ".toml", + ".ts", + ".tsx", + ".txt", + ".vue", +]); + +const DEFAULT_BOOTSTRAP = { + expectedDomain: "", + targetUrl: "", + requiresTargetPage: true, + pageTitleKeywords: [], + source: "deterministic", +}; + +const BOOTSTRAP_ROLE_PRIORITY = ["business_entry", "business_api", "gateway_api"]; + +const GENERIC_SCENE_IDS = new Set([ + "scene", + "report", + "generated", + "generated-scene", + "skill", + "default-scene", +]); + +const GENERIC_SCENE_ID_TOKENS = new Set([ + "api", + "app", + "data", + "default", + "export", + "generator", + "page", + "report", + "request", + "scene", + "skill", + "system", + "table", + "temp", + "test", + "tmp", +]); + +const SCENE_ID_ALIAS_RULES = [ + { pattern: /营销\s*2(?:\.|。)?0/gi, replacement: "marketing" }, + { pattern: /零度户报表数据生成/gi, replacement: "zero-consumer-report" }, + { pattern: /零度户报表/gi, replacement: "zero-consumer-report" }, + { pattern: /零度户/gi, replacement: "zero-consumer" }, + { pattern: /台区线损率统计分析/gi, replacement: "tq-lineloss-analysis" }, + { pattern: /台区线损/gi, replacement: "tq-lineloss" }, + { pattern: /线损率统计分析/gi, replacement: "lineloss-analysis" }, + { pattern: /线损率/gi, replacement: "lineloss-rate" }, + { pattern: /线损/gi, replacement: "lineloss" }, + { pattern: /台区/gi, replacement: "tq" }, + { pattern: /月[_-]?周累计/gi, replacement: "monthly-weekly-cumulative" }, + { pattern: /月累计/gi, replacement: "monthly-cumulative" }, + { pattern: /周累计/gi, replacement: "weekly-cumulative" }, + { pattern: /统计分析/gi, replacement: "analysis" }, + { pattern: /报表数据生成/gi, replacement: "report" }, + { pattern: /报表/gi, replacement: "report" }, + { pattern: /数据生成/gi, replacement: "generator" }, +]; + +function normalizePath(inputPath) { + return inputPath.replace(/\\/g, "/"); +} function runGenerator(params, sseWriter, projectRoot) { - const { sourceDir, sceneId, sceneName, sceneKind, targetUrl, outputRoot, lessons, sceneInfoJson } = params; - - const normalize = (p) => p.replace(/\\/g, "/"); + const { + sourceDir, + sceneId, + sceneName, + sceneKind, + targetUrl, + outputRoot, + lessons, + sceneInfoJson, + sceneIrJson, + completionMeta, + } = params; const args = [ "run", @@ -12,43 +98,44 @@ function runGenerator(params, sseWriter, projectRoot) { "sg_scene_generate", "--", "--source-dir", - normalize(sourceDir), + normalizePath(sourceDir), "--scene-id", sceneId, "--scene-name", sceneName, ]; - // 只有明确指定 sceneKind 时才添加参数(否则使用默认值 report_collection) if (sceneKind) { args.push("--scene-kind", sceneKind); } - // 如果提供了 targetUrl,添加参数 if (targetUrl) { args.push("--target-url", targetUrl); } - args.push("--output-root", normalize(outputRoot)); + args.push("--output-root", normalizePath(outputRoot)); if (lessons) { - args.push("--lessons", normalize(lessons)); + args.push("--lessons", normalizePath(lessons)); } - // Pass scene info JSON if available if (sceneInfoJson) { args.push("--scene-info-json", sceneInfoJson); } + if (sceneIrJson) { + args.push("--scene-ir-json", sceneIrJson); + } + return new Promise((resolve, reject) => { sseWriter.write( `event: status\ndata: ${JSON.stringify({ - message: "开始生成 skill 包...", + message: "Starting skill generation...", })}\n\n` ); sseWriter.write( `event: status\ndata: ${JSON.stringify({ - message: `执行: cargo ${args.join(" ")}`, + message: `Running: cargo ${args.join(" ")}`, })}\n\n` ); @@ -67,7 +154,7 @@ function runGenerator(params, sseWriter, projectRoot) { child.kill("SIGTERM"); sseWriter.write( `event: error\ndata: ${JSON.stringify({ - message: "生成超时(5分钟)", + message: "Generation timed out after 5 minutes.", })}\n\n` ); resolve({ success: false, error: "timeout" }); @@ -98,20 +185,23 @@ function runGenerator(params, sseWriter, projectRoot) { const skillRoot = match ? match[1] : null; sseWriter.write( `event: status\ndata: ${JSON.stringify({ - message: "✅ 生成成功", + message: "Skill generation completed.", })}\n\n` ); sseWriter.write( `event: complete\ndata: ${JSON.stringify({ success: true, skillRoot, + readiness: completionMeta?.readiness || null, + workflowArchetype: completionMeta?.workflowArchetype || null, + confidence: completionMeta?.confidence || 0, })}\n\n` ); resolve({ success: true, skillRoot }); } else { sseWriter.write( `event: error\ndata: ${JSON.stringify({ - message: `生成失败 (exit code ${code})`, + message: `Generation failed (exit code ${code})`, })}\n\n` ); if (stderr.trim()) { @@ -129,7 +219,7 @@ function runGenerator(params, sseWriter, projectRoot) { clearTimeout(timeout); sseWriter.write( `event: error\ndata: ${JSON.stringify({ - message: `无法启动 cargo: ${err.message}`, + message: `Failed to start cargo: ${err.message}`, })}\n\n` ); reject(err); @@ -138,9 +228,6 @@ function runGenerator(params, sseWriter, projectRoot) { } function readDirectory(sourceDir) { - const fs = require("fs"); - const p = require("path"); - if (!fs.existsSync(sourceDir)) { throw new Error(`Directory not found: ${sourceDir}`); } @@ -150,48 +237,1159 @@ function readDirectory(sourceDir) { throw new Error(`Not a directory: ${sourceDir}`); } - const result = {}; - const entries = fs.readdirSync(sourceDir, { withFileTypes: true }); - + const files = []; const treeLines = []; - for (const entry of entries) { - treeLines.push(`├── ${entry.name}`); - } - result.tree = treeLines.join("\n"); - const sceneTomlPath = p.join(sourceDir, "scene.toml"); - if (fs.existsSync(sceneTomlPath)) { - result["scene.toml"] = fs.readFileSync(sceneTomlPath, "utf-8"); - } + walkDirectory(sourceDir, "", files, treeLines); - const skillTomlPath = p.join(sourceDir, "SKILL.toml"); - if (fs.existsSync(skillTomlPath)) { - result["SKILL.toml"] = fs.readFileSync(skillTomlPath, "utf-8"); - } + const result = { + tree: treeLines.join("\n"), + files, + }; - const skillMdPath = p.join(sourceDir, "SKILL.md"); - if (fs.existsSync(skillMdPath)) { - result["SKILL.md"] = fs.readFileSync(skillMdPath, "utf-8"); - } - - // Read index.html - const indexHtmlPath = p.join(sourceDir, "index.html"); - if (fs.existsSync(indexHtmlPath)) { - result.indexHtml = fs.readFileSync(indexHtmlPath, "utf-8"); + for (const file of files) { + const baseName = path.basename(file.path); + if (baseName === "scene.toml") result["scene.toml"] = file.content; + if (baseName === "SKILL.toml") result["SKILL.toml"] = file.content; + if (baseName === "SKILL.md") result["SKILL.md"] = file.content; + if (baseName === "index.html" && !result.indexHtml) result.indexHtml = file.content; } const scripts = {}; - for (const entry of entries) { - if (entry.isFile() && entry.name.endsWith(".js")) { - const scriptPath = p.join(sourceDir, entry.name); - scripts[entry.name] = fs.readFileSync(scriptPath, "utf-8"); + for (const file of files) { + if (file.path.endsWith(".js")) { + scripts[file.path] = file.content; } } if (Object.keys(scripts).length > 0) { result.scripts = scripts; } + result.analysisContext = buildAnalysisContext(sourceDir, result); + result.deterministic = buildDeterministicSceneIr(result.analysisContext, sourceDir); return result; } -module.exports = { runGenerator, readDirectory }; +function walkDirectory(rootDir, relativeDir, files, treeLines) { + const absoluteDir = relativeDir ? path.join(rootDir, relativeDir) : rootDir; + const entries = fs + .readdirSync(absoluteDir, { withFileTypes: true }) + .sort((a, b) => a.name.localeCompare(b.name, "en")); + + for (const entry of entries) { + const relativePath = relativeDir + ? path.posix.join(normalizePath(relativeDir), entry.name) + : entry.name; + treeLines.push(`${entry.isDirectory() ? "[D]" : "[F]"} ${relativePath}`); + + if (entry.isDirectory()) { + walkDirectory(rootDir, relativePath, files, treeLines); + continue; + } + + const ext = path.extname(entry.name).toLowerCase(); + if (!TEXT_FILE_EXTENSIONS.has(ext)) { + continue; + } + + const absolutePath = path.join(absoluteDir, entry.name); + const stat = fs.statSync(absolutePath); + if (stat.size > 1024 * 1024) { + continue; + } + + const content = fs.readFileSync(absolutePath, "utf-8"); + files.push({ + path: normalizePath(relativePath), + content, + }); + } +} + +function buildAnalysisContext(sourceDir, dirContents) { + const files = Array.isArray(dirContents.files) ? dirContents.files : []; + const indexHtml = dirContents.indexHtml || ""; + const directorySummary = { + sourceDir: normalizePath(sourceDir), + tree: dirContents.tree || "(empty)", + files: files.map((file) => ({ + path: file.path, + length: file.content.length, + })), + }; + + const THIRD_PARTY_JS_PATTERNS = [ + "vue.js", + "vue.min.js", + "element-ui", + "elementui", + "axios", + "jquery", + "jquery.min.js", + "echarts", + "echarts.min.js", + ]; + + const dirFiles = dirContents.files || []; + const businessJsFragments = []; + for (const file of dirFiles) { + if (!file.path.startsWith("js/") || !file.path.endsWith(".js")) continue; + const baseName = path.basename(file.path).toLowerCase(); + if (THIRD_PARTY_JS_PATTERNS.some((pattern) => baseName.includes(pattern))) continue; + businessJsFragments.push({ + path: file.path, + snippet: file.content.length > 600 ? file.content.slice(0, 600) : file.content, + }); + } + + return { + directorySummary, + indexHtmlChunks: chunkFile("index.html", indexHtml, 3000, 2), + urlFragments: collectFragments(files, isUrlFragment, 10), + requestFragments: collectFragments(files, isRequestFragment, 10), + branchingFragments: collectFragments(files, isBranchFragment, 8), + responseFragments: collectFragments(files, isResponseFragment, 8), + exportFragments: collectFragments(files, isExportFragment, 6), + bootstrapHints: collectBootstrapHints(files, indexHtml), + deterministicSignals: collectDeterministicSignals(files, indexHtml), + businessJsFragments, + }; +} + +function chunkFile(filePath, content, chunkSize, maxChunks) { + if (!content) return []; + const chunks = []; + let index = 0; + let offset = 0; + while (offset < content.length && chunks.length < maxChunks) { + chunks.push({ + path: filePath, + index, + start: offset, + end: Math.min(offset + chunkSize, content.length), + content: content.slice(offset, offset + chunkSize), + }); + index += 1; + offset += chunkSize; + } + return chunks; +} + +function collectFragments(files, predicate, limit) { + const fragments = []; + for (const file of files) { + const fileFragments = extractLineFragments(file.path, file.content, predicate, limit - fragments.length); + fragments.push(...fileFragments); + if (fragments.length >= limit) { + break; + } + } + return fragments; +} + +function extractLineFragments(filePath, content, predicate, remainingLimit) { + if (!content || remainingLimit <= 0) return []; + const fragments = []; + const lines = content.split(/\r?\n/); + const seen = new Set(); + + for (let i = 0; i < lines.length && fragments.length < remainingLimit; i += 1) { + if (!predicate(lines[i])) continue; + const start = Math.max(0, i - 3); + const end = Math.min(lines.length, i + 4); + const snippet = lines.slice(start, end).join("\n").trim(); + if (!snippet || seen.has(snippet)) continue; + seen.add(snippet); + fragments.push({ + path: filePath, + lineStart: start + 1, + lineEnd: end, + snippet: snippet.length > 1200 ? snippet.slice(0, 1200) : snippet, + }); + } + + return fragments; +} + +function isUrlFragment(line) { + return /(https?:\/\/|['"`]\/[^'"`\s]+|url\s*:|fetch\s*\(|axios\.(get|post|request)|\$\.(ajax|get|post))/i.test(line); +} + +function isRequestFragment(line) { + return /(contentType|JSON\.stringify|requestBody|requestData|payload|params|data\s*:|body\s*:|FormData|\bpage(Size|No)?\b|\brows\b)/i.test(line); +} + +function isBranchFragment(line) { + return /\b(if|else if|switch|case)\b/.test(line) && /(mode|period|reportType|tab|status|scene|type)/i.test(line); +} + +function isResponseFragment(line) { + return /(response|res|result)\.(data|content|rows|list|records|items)|\.map\(|\.filter\(|columnDefs|columns|normalize|transform/i.test(line); +} + +function isExportFragment(line) { + return /(export|download|xlsx|csv|blob|saveAs|excel)/i.test(line); +} + +function collectBootstrapHints(files, indexHtml) { + const hints = []; + const seen = new Set(); + + for (const match of indexHtml.matchAll(/<(a|form|iframe)[^>]+(?:href|action|src)=["']([^"']+)["']/gi)) { + const url = match[2]; + if (url && !isStaticAssetUrl(url) && !seen.has(url)) { + seen.add(url); + hints.push({ type: match[1], url }); + } + } + + for (const file of files) { + const namedUrlMatches = file.content.matchAll( + /\b(sourceUrl|sourceURL|baseUrl|baseURL|targetUrl|requestUrl|apiUrl|gatewayUrl)\b\s*[:=]\s*(['"`])(https?:\/\/[^'"`\s]+)\2/gi + ); + for (const match of namedUrlMatches) { + const url = match[3]; + const type = String(match[1] || "").toLowerCase(); + if (url && !seen.has(url)) { + seen.add(url); + hints.push({ type, url, path: file.path }); + } + } + + const matches = file.content.matchAll(/window\.open\((['"`])([^'"`]+)\1|location\.(?:href|assign|replace)\((['"`])([^'"`]+)\3/gi); + for (const match of matches) { + const url = match[2] || match[4]; + if (url && !seen.has(url)) { + seen.add(url); + hints.push({ type: "navigation", url, path: file.path }); + } + } + } + + return hints.slice(0, 12); +} + +function collectDeterministicSignals(files, indexHtml) { + const urls = new Map(); + const methods = new Map(); + const responsePaths = new Set(); + const branchFields = new Set(); + const modeValues = new Set(); + const paginationVars = new Set(); + const filterExpressions = new Set(); + const entryMethods = new Set(); + const exportMethods = new Set(); + const secondaryRequestMethods = new Set(); + const pageTitleKeywords = new Set(); + const staticParams = {}; + + for (const file of files) { + const content = file.content; + + for (const endpoint of extractEndpoints(content)) { + const key = `${endpoint.method}|${endpoint.url}`; + if (!urls.has(key)) { + urls.set(key, endpoint); + } + } + + for (const match of content.matchAll(/\b(type|method)\s*:\s*['"`](GET|POST|PUT|DELETE|PATCH)['"`]/gi)) { + methods.set(match[2].toUpperCase(), true); + } + + for (const match of content.matchAll(/\b(?:response|res|result)\.(data|content|rows|list|records|items)\b/g)) { + responsePaths.add(match[1]); + } + + for (const match of content.matchAll(/\b(?:if|switch|case)\b[\s\S]{0,120}?(period_mode|reportType|mode|tab|sceneType|status|type)\b/gi)) { + branchFields.add(match[1]); + } + + for (const match of content.matchAll(/\b(period_mode|reportType|mode|tab)\b[\s\S]{0,80}?['"`](month|week|day|detail|summary|list|chart)['"`]/gi)) { + modeValues.add(match[2]); + } + + for (const match of content.matchAll(/\b(pageSize|pageNo|pageNum|page|rows|limit|offset)\b/g)) { + paginationVars.add(match[1]); + } + + for (const match of content.matchAll(/\.filter\(\s*(.+?)\s*\)|if\s*\(([^)]*(?:!==|!=|===|==|>|<)[^)]*)\)/g)) { + const expr = (match[1] || match[2] || "").trim(); + if (expr && expr.length <= 160) { + filterExpressions.add(expr); + } + } + + for (const match of content.matchAll(/(?:function\s+|const\s+|let\s+|var\s+)([A-Za-z_$][\w$]*)\s*(?:=\s*(?:async\s*)?\(|\()/g)) { + const name = match[1]; + if (/(query|search|load|fetch|init|mounted|created|getData)/i.test(name)) { + entryMethods.add(name); + } + if (/(export|download|excel|csv)/i.test(name)) { + exportMethods.add(name); + } + if (/(detail|charge|charges|info|details)/i.test(name)) { + secondaryRequestMethods.add(name); + } + } + + for (const match of content.matchAll(/(?:title|document\.title)\s*[:=]\s*['"`]([^'"`]{2,40})['"`]/gi)) { + pageTitleKeywords.add(match[1]); + } + + const staticParamMatches = content.matchAll(/\b(orgNo|orgCode|orgId|period_mode|reportType|pageSize|rows)\b\s*:\s*['"`]([^'"`\n]+)['"`]/gi); + for (const match of staticParamMatches) { + if (!(match[1] in staticParams)) { + staticParams[match[1]] = match[2]; + } + } + } + + for (const match of indexHtml.matchAll(/([^<]{2,40})<\/title>/gi)) { + pageTitleKeywords.add(match[1].trim()); + } + + const bootstrapCandidates = collectBootstrapCandidates(files, indexHtml, Array.from(urls.values())); + return { + endpoints: Array.from(urls.values()), + methods: Array.from(methods.keys()), + responsePaths: Array.from(responsePaths), + branchFields: Array.from(branchFields), + modeValues: Array.from(modeValues), + paginationVars: Array.from(paginationVars), + filterExpressions: Array.from(filterExpressions).slice(0, 8), + entryMethods: Array.from(entryMethods).slice(0, 10), + exportMethods: Array.from(exportMethods).slice(0, 10), + secondaryRequestMethods: Array.from(secondaryRequestMethods).slice(0, 10), + pageTitleKeywords: Array.from(pageTitleKeywords).slice(0, 10), + staticParams, + bootstrapCandidates, + }; +} + +function extractEndpoints(content) { + const endpoints = []; + const seen = new Set(); + const lines = content.split(/\r?\n/); + + for (let index = 0; index < lines.length; index += 1) { + const line = lines[index]; + if (!isUrlFragment(line)) continue; + + const block = lines.slice(Math.max(0, index - 2), Math.min(lines.length, index + 5)).join("\n"); + const urlMatch = + block.match(/\burl\s*:\s*(['"`])([^'"`]+)\1/i) || + block.match(/fetch\s*\(\s*(['"`])([^'"`]+)\1/i) || + block.match(/axios\.(?:get|post|request)\s*\(\s*(['"`])([^'"`]+)\1/i); + + if (!urlMatch) continue; + + const url = sanitizeUrl(urlMatch[2]); + if (!url) continue; + + const methodMatch = + block.match(/\b(?:type|method)\s*:\s*(['"`])([A-Z]+)\1/i) || + block.match(/\baxios\.post\s*\(/i) || + block.match(/\baxios\.get\s*\(/i); + const method = methodMatch + ? String(methodMatch[2] || (methodMatch[0].includes(".post") ? "POST" : "GET")).toUpperCase() + : "GET"; + const contentTypeMatch = block.match(/\bcontentType\s*:\s*(['"`])([^'"`]+)\1/i); + const name = inferEndpointName(block, url, endpoints.length); + const role = classifyRequestRole(url); + const key = `${method}|${url}`; + if (seen.has(key)) continue; + seen.add(key); + endpoints.push({ + name, + url, + role, + method, + contentType: contentTypeMatch ? contentTypeMatch[2] : null, + description: `Detected from source snippet around line ${index + 1}`, + }); + } + + return endpoints.slice(0, 12); +} + +function sanitizeUrl(rawUrl) { + if (!rawUrl) return ""; + const value = rawUrl.trim(); + if (!value) return ""; + if (isStaticAssetUrl(value)) return ""; + if (isTemplateNoiseUrl(value)) return ""; + if (/^(javascript:|data:|#)/i.test(value)) return ""; + if (/\.js(\?|$)|\.css(\?|$)|\.png(\?|$)|\.svg(\?|$)/i.test(value)) return ""; + return value; +} + +function classifyRequestRole(rawUrl) { + const value = String(rawUrl || "").toLowerCase(); + if (!value || isTemplateNoiseUrl(value)) return "template_noise"; + if (value.includes("localhost") || value.includes("127.0.0.1")) { + return /(surfaceservices|reportservices|export)/i.test(value) ? "export_service" : "local_helper"; + } + if (value.includes("gateway")) return "gateway_api"; + return "business_api"; +} + +function inferEndpointName(block, url, index) { + const functionMatch = block.match(/(?:function|const|let|var)\s+([A-Za-z_$][\w$]*)/); + if (functionMatch) return functionMatch[1]; + const pathParts = url.split(/[/?#]/).filter(Boolean); + return pathParts[pathParts.length - 1] || `endpoint_${index + 1}`; +} + +function collectBootstrapCandidates(files, indexHtml, endpoints) { + const candidates = []; + const seen = new Set(); + + for (const endpoint of endpoints) { + const candidate = buildBootstrapCandidate(endpoint.url, "api_endpoint"); + const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : ""; + if (candidate && !seen.has(key)) { + seen.add(key); + candidates.push(candidate); + } + } + + for (const hint of collectBootstrapHints(files, indexHtml)) { + const candidate = buildBootstrapCandidate(hint.url, hint.type); + const key = candidate ? `${candidate.role}|${candidate.targetUrl}` : ""; + if (candidate && !seen.has(key)) { + seen.add(key); + candidates.push(candidate); + } + } + + return candidates.slice(0, 8); +} + +function buildBootstrapCandidate(rawUrl, source) { + if (!rawUrl) return null; + try { + const url = new URL(rawUrl, "http://placeholder.local"); + const role = classifyBootstrapRole(rawUrl, source, url); + if (!role) { + return null; + } + const isAbsolute = /^https?:\/\//i.test(rawUrl); + const targetUrl = isAbsolute + ? role === "business_api" || role === "gateway_api" + ? url.origin + : `${url.origin}${url.pathname}` + : rawUrl; + return { + expectedDomain: isAbsolute ? url.hostname : "", + targetUrl, + source, + role, + validForBootstrap: BOOTSTRAP_ROLE_PRIORITY.includes(role), + }; + } catch (_) { + return null; + } +} + +function classifyBootstrapRole(rawUrl, source, parsedUrl) { + const value = String(rawUrl || "").trim(); + if (!value || isTemplateNoiseUrl(value)) return "template_noise"; + if (isStaticAssetUrl(value)) return "static_asset"; + if (/^(javascript:|data:|#)/i.test(value)) return "template_noise"; + + const isAbsolute = /^https?:\/\//i.test(value); + if (!isAbsolute) { + return value.startsWith("/") ? "business_entry" : "template_noise"; + } + + const hostname = String(parsedUrl?.hostname || "").toLowerCase(); + const pathname = String(parsedUrl?.pathname || ""); + if (hostname === "localhost" || hostname === "127.0.0.1") { + if (/(SurfaceServices|ReportServices|export)/i.test(pathname)) { + return "export_service"; + } + return "local_helper"; + } + if (/(gateway)/i.test(hostname) || /(gateway)/i.test(value)) { + return "gateway_api"; + } + if (/(sourceurl|targeturl|navigation|form|iframe|a)/i.test(source)) { + return "business_entry"; + } + if (/(apiurl|requesturl|baseurl|api_endpoint)/i.test(source) || /\/api\//i.test(pathname)) { + return "business_api"; + } + return "business_entry"; +} + +function isTemplateNoiseUrl(rawUrl) { + return /\$\{[^}]+\}|%s|placeholder|not a valid/i.test(rawUrl); +} + +function isStaticAssetUrl(rawUrl) { + return /(?:cdn|static|assets?)|(?:\.js|\.css|\.png|\.jpg|\.jpeg|\.gif|\.svg|\.ico)(?:\?|$)/i.test(rawUrl); +} + +function buildDeterministicSceneIr(context, sourceDir) { + const signals = context.deterministicSignals || {}; + const sceneName = humanizeSceneName(path.basename(sourceDir)); + const sceneIdDiagnostics = deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }); + const sceneId = sceneIdDiagnostics.selected || ""; + const workflowArchetype = classifyWorkflowArchetype(signals); + const evidence = buildEvidence(signals, workflowArchetype); + const bootstrap = deriveBootstrap(signals); + const modes = buildModes(signals); + const workflowSteps = buildWorkflowSteps(signals, workflowArchetype); + const workflowEvidence = buildWorkflowEvidence(signals); + const responsePath = signals.responsePaths?.[0] || ""; + const normalizeRules = buildNormalizeRules(signals); + const params = buildParams(signals, workflowArchetype); + const confidence = scoreConfidence(signals, workflowArchetype); + const readiness = buildReadiness({ + sceneIdDiagnostics, + workflowArchetype, + bootstrap, + apiEndpoints: signals.endpoints || [], + params, + workflowSteps, + confidence, + }); + + return { + sceneId, + sceneIdDiagnostics, + sceneName, + sceneKind: "report_collection", + workflowArchetype, + bootstrap, + params, + modes, + defaultMode: modes[0] ? modes[0].name : null, + modeSwitchField: signals.branchFields?.find((field) => /mode|period/i.test(field)) || null, + workflowSteps, + workflowEvidence, + requestTemplate: {}, + responsePath, + normalizeRules, + artifactContract: { + type: "report-artifact", + successStatus: ["ok", "partial", "empty"], + failureStatus: ["blocked", "error"], + }, + validationHints: { + requiresTargetPage: true, + runtimeCompatible: params.every((param) => + ["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver) + ), + manualCompletionRequired: readiness.level !== "A", + missingPieces: readiness.missingPieces.slice(), + }, + evidence, + readiness, + apiEndpoints: signals.endpoints || [], + staticParams: signals.staticParams || {}, + columnDefs: [], + confidence, + uncertainties: buildUncertainties(signals, workflowArchetype), + deterministicSignals: signals, + }; +} + +function deriveSceneIdDiagnostics({ sourceDir, sceneName, signals }) { + const baseName = path.basename(sourceDir || ""); + const candidates = []; + const seen = new Set(); + + function pushCandidate(value, source) { + const normalized = slugifyAscii(value); + if (!normalized || seen.has(normalized)) return; + seen.add(normalized); + const validation = validateSceneIdCandidate(normalized, { sceneName, sourceDir, signals }); + candidates.push({ + value: normalized, + source, + valid: validation.valid, + reason: validation.valid ? null : validation.reason, + }); + } + + pushCandidate(buildKeywordSceneId(baseName, sceneName, signals), "deterministic_keywords"); + pushCandidate(buildAliasSceneId(baseName), "controlled_alias"); + pushCandidate(slugifyAscii(baseName), "directory_slug"); + + const selectedCandidate = candidates.find((candidate) => candidate.valid) || candidates[0] || null; + return { + selected: selectedCandidate?.value || "", + candidateSource: selectedCandidate?.source || "", + valid: Boolean(selectedCandidate?.valid), + invalidReason: selectedCandidate && !selectedCandidate.valid ? selectedCandidate.reason : null, + candidates, + }; +} + +function buildKeywordSceneId(baseName, sceneName, signals) { + const aliasedCandidates = Array.from( + new Set( + [baseName, sceneName] + .filter(Boolean) + .map((value) => slugifyAscii(applySceneIdAliases(value))) + .filter(Boolean) + ) + ).sort((left, right) => right.length - left.length); + const aliased = aliasedCandidates[0] || ""; + if (aliased && aliased.split("-").length >= 2) { + return aliased; + } + const tokens = extractBusinessTokens( + [baseName, sceneName] + .filter(Boolean) + .join(" "), + signals + ); + if (!tokens.length) return ""; + return tokens.slice(0, 5).join("-"); +} + +function buildAliasSceneId(baseName) { + if (!baseName) return ""; + let value = String(baseName || ""); + for (const rule of SCENE_ID_ALIAS_RULES) { + value = value.replace(rule.pattern, ` ${rule.replacement} `); + } + return slugifyAscii(value); +} + +function extractBusinessTokens(rawText, signals = {}) { + const candidates = []; + const pushTokens = (value) => { + const slug = slugifyAscii(value); + if (!slug) return; + for (const token of slug.split("-")) { + if (!token || token.length < 2 || GENERIC_SCENE_ID_TOKENS.has(token)) continue; + candidates.push(token); + } + }; + + pushTokens(applySceneIdAliases(rawText)); + + for (const keyword of signals.pageTitleKeywords || []) { + pushTokens(applySceneIdAliases(keyword)); + } + + for (const endpoint of signals.endpoints || []) { + pushTokens(endpoint?.name); + const urlValue = String(endpoint?.url || ""); + const segments = urlValue + .split(/[/?#=&._-]+/) + .filter(Boolean) + .slice(-4); + for (const segment of segments) { + pushTokens(segment); + } + } + + return Array.from(new Set(candidates)); +} + +function applySceneIdAliases(value) { + let text = String(value || ""); + for (const rule of SCENE_ID_ALIAS_RULES) { + text = text.replace(rule.pattern, ` ${rule.replacement} `); + } + return text; +} + +function validateSceneIdCandidate(sceneId, { sceneName = "", sourceDir = "", signals = {} } = {}) { + const normalized = slugifyAscii(sceneId); + if (!normalized) { + return { valid: false, reason: "empty_scene_id" }; + } + if (GENERIC_SCENE_IDS.has(normalized)) { + return { valid: false, reason: "generic_scene_id" }; + } + + const letters = (normalized.match(/[a-z]/g) || []).length; + const digits = (normalized.match(/\d/g) || []).length; + if (!letters) { + return { valid: false, reason: "numeric_only_scene_id" }; + } + if ((normalized.length < 5 || letters < 3) && !normalized.includes("-")) { + return { valid: false, reason: "scene_id_too_short" }; + } + if (digits > letters && letters < 4) { + return { valid: false, reason: "numeric_dominant_scene_id" }; + } + + const expectedTokens = extractBusinessTokens( + [sceneName, path.basename(sourceDir || "")] + .filter(Boolean) + .join(" "), + signals + ).filter((token) => token.length >= 3); + if (expectedTokens.length) { + const matched = expectedTokens.some((token) => normalized.includes(token)); + if (!matched) { + return { valid: false, reason: "scene_id_semantic_detached" }; + } + } + + return { valid: true, reason: null }; +} + +function classifyWorkflowArchetype(signals) { + const businessEndpoints = getBusinessEndpoints(signals); + const hasPagination = (signals.paginationVars || []).length > 0; + const hasSecondaryRequest = + (signals.secondaryRequestMethods || []).length > 0 || businessEndpoints.length >= 2; + const hasPostProcess = + (signals.filterExpressions || []).length > 0 || (signals.exportMethods || []).length > 0; + if (hasPagination && hasSecondaryRequest && hasPostProcess) { + return "paginated_enrichment"; + } + + const hasModeBranch = (signals.branchFields || []).some((field) => /period_mode|reportType|tjzq|mode/i.test(field)); + const hasModeValues = (signals.modeValues || []).length >= 2; + const hasModeDivergence = hasModeValues && businessEndpoints.length >= 2; + if (hasModeBranch && hasModeDivergence) { + return "multi_mode_request"; + } + + const pageStateSignals = [...(signals.entryMethods || []), ...(signals.filterExpressions || [])].join(" "); + if (/(state|status|ready|available|enabled)/i.test(pageStateSignals) && businessEndpoints.length <= 1) { + return "page_state_eval"; + } + + return "single_request_table"; +} + +function buildEvidence(signals, workflowArchetype) { + const evidence = []; + const businessEndpoints = getBusinessEndpoints(signals); + + if (businessEndpoints.length > 0) { + evidence.push({ + kind: "deterministic", + summary: `Detected ${businessEndpoints.length} business API endpoint(s).`, + source: "runner", + confidence: 0.9, + }); + } + + if ((signals.branchFields || []).length > 0) { + evidence.push({ + kind: "deterministic", + summary: `Branch fields: ${signals.branchFields.join(", ")}`, + source: "runner", + confidence: 0.86, + }); + } + + if ((signals.paginationVars || []).length > 0) { + evidence.push({ + kind: "deterministic", + summary: `Pagination vars: ${signals.paginationVars.join(", ")}`, + source: "runner", + confidence: 0.84, + }); + } + + if ((signals.secondaryRequestMethods || []).length > 0) { + evidence.push({ + kind: "deterministic", + summary: `Secondary request methods: ${signals.secondaryRequestMethods.join(", ")}`, + source: "runner", + confidence: 0.82, + }); + } + + if ((signals.exportMethods || []).length > 0) { + evidence.push({ + kind: "deterministic", + summary: `Export methods: ${signals.exportMethods.join(", ")}`, + source: "runner", + confidence: 0.78, + }); + } + + evidence.push({ + kind: "classification", + summary: `Workflow archetype classified as ${workflowArchetype}.`, + source: "runner", + confidence: 0.72, + }); + + return evidence; +} + +function deriveBootstrap(signals) { + const candidate = BOOTSTRAP_ROLE_PRIORITY + .map((role) => + (signals.bootstrapCandidates || []).find( + (item) => item.role === role && item.validForBootstrap && item.targetUrl + ) + ) + .find(Boolean); + + if (candidate) { + return { + expectedDomain: candidate.expectedDomain || "", + targetUrl: candidate.targetUrl || "", + requiresTargetPage: true, + pageTitleKeywords: signals.pageTitleKeywords || [], + source: candidate.source || "deterministic", + }; + } + + return { + ...DEFAULT_BOOTSTRAP, + pageTitleKeywords: signals.pageTitleKeywords || [], + }; +} + +function getBusinessEndpoints(signals) { + return (signals.endpoints || []).filter((endpoint) => + ["business_api", "gateway_api", "business_entry"].includes(endpoint.role) + ); +} + +function buildModes(signals) { + const values = (signals.modeValues || []).slice(0, 4); + if (!values.length) return []; + + return values.map((value, index) => ({ + name: value, + label: value, + condition: { + field: signals.branchFields?.find((field) => /mode|period|tab|type/i.test(field)) || "period_mode", + operator: "equals", + value, + }, + apiEndpoint: signals.endpoints?.[index] || signals.endpoints?.[0] || null, + columnDefs: [], + requestTemplate: {}, + normalizeRules: buildNormalizeRules(signals), + responsePath: signals.responsePaths?.[0] || "", + })); +} + +function buildWorkflowSteps(signals, workflowArchetype) { + const steps = []; + const businessEndpoints = getBusinessEndpoints(signals); + const primaryEndpoint = businessEndpoints[0]?.name || null; + const secondaryEndpoint = businessEndpoints[1]?.name || null; + + if (workflowArchetype === "multi_mode_request") { + steps.push({ + type: "request", + entry: signals.entryMethods?.[0] || null, + endpoint: primaryEndpoint, + description: "Select mode and query the matching endpoint.", + }); + steps.push({ + type: "transform", + description: "Normalize mode-specific rows into a shared artifact.", + }); + return steps; + } + + if (workflowArchetype === "paginated_enrichment") { + steps.push({ + type: "paginate", + entry: signals.entryMethods?.[0] || null, + endpoint: primaryEndpoint, + description: "Iterate primary list pages.", + }); + if ((signals.secondaryRequestMethods || []).length > 0 || secondaryEndpoint) { + steps.push({ + type: "secondary_request", + entry: signals.secondaryRequestMethods?.[0] || signals.entryMethods?.[1] || null, + endpoint: secondaryEndpoint, + description: "Fetch per-row or batched detail data.", + }); + } + if (signals.filterExpressions?.[0]) { + steps.push({ + type: "filter", + expr: signals.filterExpressions[0], + description: "Apply business-side filtering.", + }); + } + if (signals.exportMethods?.[0]) { + steps.push({ + type: "export", + entry: signals.exportMethods[0], + description: "Prepare export payload or trigger download logic.", + }); + } + return steps; + } + + if (workflowArchetype === "page_state_eval") { + steps.push({ + type: "page_state", + entry: signals.entryMethods?.[0] || null, + description: "Evaluate page state and derive readiness outcome.", + }); + return steps; + } + + steps.push({ + type: "request", + entry: signals.entryMethods?.[0] || null, + endpoint: primaryEndpoint, + description: "Issue the primary scene request.", + }); + steps.push({ + type: "transform", + description: "Normalize the primary response.", + }); + return steps; +} + +function buildNormalizeRules(signals) { + return { + type: "validate_required", + requiredFields: [], + filterNull: true, + responseHints: signals.responsePaths || [], + }; +} + +function buildWorkflowEvidence(signals) { + return { + requestEntries: uniqueStringValues([ + ...(signals.entryMethods || []).slice(0, 3), + ...getBusinessEndpoints(signals).map((endpoint) => endpoint.name).slice(0, 3), + ]), + paginationFields: uniqueStringValues(signals.paginationVars || []), + secondaryRequestEntries: uniqueStringValues([ + ...(signals.secondaryRequestMethods || []), + ...getBusinessEndpoints(signals).slice(1, 3).map((endpoint) => endpoint.name), + ]), + postProcessSteps: uniqueStringValues([ + ...(signals.filterExpressions || []).map(() => "filter"), + ...(signals.exportMethods || []).map(() => "export"), + ]), + }; +} + +function buildParams(signals, workflowArchetype) { + const params = []; + const staticKeys = Object.keys(signals.staticParams || {}); + + if (staticKeys.some((key) => /org/i.test(key)) || workflowArchetype === "multi_mode_request") { + params.push({ + name: "org", + resolver: "dictionary_entity", + required: true, + promptMissing: "Organization parameter is missing.", + promptAmbiguous: "Organization parameter is ambiguous.", + resolverConfig: {}, + }); + } + + if ( + (signals.branchFields || []).some((field) => /period|mode/i.test(field)) || + staticKeys.some((key) => /period/i.test(key)) + ) { + params.push({ + name: "period", + resolver: "month_week_period", + required: true, + promptMissing: "Period parameter is missing.", + promptAmbiguous: "Period parameter is ambiguous.", + resolverConfig: {}, + }); + } + + if ((signals.paginationVars || []).some((value) => /pageSize|rows|limit/i.test(value))) { + params.push({ + name: "page_size", + resolver: "literal_passthrough", + required: false, + promptMissing: "", + promptAmbiguous: "", + resolverConfig: {}, + }); + } + + return params; +} + +function buildUncertainties(signals, workflowArchetype) { + const issues = []; + if (!(signals.endpoints || []).length) { + issues.push("No API endpoint was detected deterministically."); + } + if (!signals.bootstrapCandidates?.some((candidate) => candidate.validForBootstrap)) { + issues.push("Bootstrap target URL is still inferred weakly."); + } + if (workflowArchetype === "paginated_enrichment" && getBusinessEndpoints(signals).length < 2) { + issues.push("Secondary enrichment request is not strongly confirmed."); + } + if (workflowArchetype === "paginated_enrichment" && !(signals.filterExpressions || []).length && !(signals.exportMethods || []).length) { + issues.push("Paginated enrichment is missing post-process evidence."); + } + return issues; +} + +function scoreConfidence(signals, workflowArchetype) { + let score = 0.3; + if (getBusinessEndpoints(signals).length > 0) score += 0.18; + if ((signals.bootstrapCandidates || []).some((candidate) => candidate.validForBootstrap)) score += 0.12; + if ((signals.responsePaths || []).length > 0) score += 0.08; + if ((signals.entryMethods || []).length > 0) score += 0.06; + if (workflowArchetype === "multi_mode_request" && (signals.modeValues || []).length >= 2) score += 0.14; + if ( + workflowArchetype === "paginated_enrichment" && + (signals.paginationVars || []).length > 0 && + (signals.secondaryRequestMethods || []).length > 0 + ) { + score += 0.14; + } + return Math.min(0.95, Number(score.toFixed(2))); +} + +function buildReadiness({ sceneIdDiagnostics, workflowArchetype, bootstrap, apiEndpoints, params, workflowSteps, confidence }) { + const risks = []; + const missingPieces = []; + const notes = []; + const businessApiEndpoints = (apiEndpoints || []).filter((endpoint) => + ["business_api", "gateway_api", "business_entry"].includes(endpoint.role) + ); + + if (!sceneIdDiagnostics?.valid) { + missingPieces.push("invalid_scene_id"); + risks.push(`Scene id is invalid${sceneIdDiagnostics?.invalidReason ? `: ${sceneIdDiagnostics.invalidReason}` : "."}`); + } + + if (!bootstrap.targetUrl && !bootstrap.expectedDomain) { + missingPieces.push("bootstrap_target"); + risks.push("Business bootstrap target is not confirmed."); + } else if (!bootstrap.expectedDomain) { + risks.push("Expected domain is missing; host validation may be weak."); + } + + if (!businessApiEndpoints.length) { + missingPieces.push("api_endpoint"); + risks.push("No request endpoint detected."); + } + + if (!workflowSteps.length) { + missingPieces.push("workflow_steps"); + risks.push("Workflow steps are incomplete."); + } + + if (workflowArchetype === "paginated_enrichment") { + const hasPaginate = workflowSteps.some((step) => step.type === "paginate"); + const hasSecondary = workflowSteps.some((step) => step.type === "secondary_request"); + const hasPostProcess = workflowSteps.some((step) => ["filter", "transform", "export"].includes(step.type)); + if (!hasPaginate) { + missingPieces.push("paginate_step"); + risks.push("Paginated enrichment lacks pagination evidence."); + } + if (!hasSecondary || businessApiEndpoints.length < 2) { + missingPieces.push("secondary_request"); + risks.push("Paginated enrichment lacks a strong secondary request signal."); + } + if (!hasPostProcess) { + missingPieces.push("post_process"); + risks.push("Paginated enrichment lacks filter/transform/export evidence."); + } + } + + if (workflowArchetype === "multi_mode_request" && !params.some((param) => param.name === "period")) { + risks.push("Mode-aware workflow is missing a resolved period parameter."); + } + + if (confidence < 0.55) { + risks.push("Overall analysis confidence is low."); + } + + let level = "A"; + if (missingPieces.length > 0) { + level = missingPieces.length >= 2 ? "C" : "B"; + } else if (risks.length > 1 || confidence < 0.7) { + level = "B"; + } + + if (level === "A") { + notes.push("Structure looks complete enough for direct trial."); + } else if (level === "B") { + notes.push("Generation should be reviewed before internal-network execution."); + } else { + notes.push("Manual completion is required before trial."); + } + + const gates = [ + { + name: "scene_id_valid", + passed: sceneIdDiagnostics?.valid !== false, + reason: sceneIdDiagnostics?.valid === false ? sceneIdDiagnostics.invalidReason || "invalid_scene_id" : null, + }, + { + name: "bootstrap_resolved", + passed: Boolean(bootstrap.targetUrl || bootstrap.expectedDomain), + reason: bootstrap.targetUrl || bootstrap.expectedDomain ? null : "bootstrap_target", + }, + { + name: "workflow_complete_for_archetype", + passed: !missingPieces.some((item) => + ["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item) + ), + reason: + missingPieces.find((item) => + ["workflow_steps", "paginate_step", "secondary_request", "post_process"].includes(item) + ) || null, + }, + { + name: "runtime_contract_compatible", + passed: params.every((param) => + ["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver) + ), + reason: params.every((param) => + ["dictionary_entity", "month_week_period", "literal_passthrough", "fixed_enum"].includes(param.resolver) + ) + ? null + : "runtime_contract_incompatible", + }, + ]; + + return { + level, + confidence, + gates, + risks, + missingPieces, + notes, + }; +} + +function slugifyAscii(value) { + return String(value || "") + .replace(/([a-z0-9])([A-Z])/g, "$1-$2") + .trim() + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, ""); +} + +function humanizeSceneName(value) { + const raw = String(value || "").replace(/[-_]+/g, " ").trim(); + if (!raw) return "Generated Scene"; + return raw.replace(/\b\w/g, (char) => char.toUpperCase()); +} + +function uniqueStringValues(list) { + return Array.from(new Set((list || []).filter(Boolean))); +} + +module.exports = { + buildAnalysisContext, + buildDeterministicSceneIr, + deriveSceneIdDiagnostics, + readDirectory, + runGenerator, + validateSceneIdCandidate, +}; diff --git a/frontend/scene-generator/llm-client.js b/frontend/scene-generator/llm-client.js index 2136074..9018001 100644 --- a/frontend/scene-generator/llm-client.js +++ b/frontend/scene-generator/llm-client.js @@ -163,6 +163,7 @@ function buildDeepAnalyzePrompt(sourceDir, dirContents) { pushFragments(parts, "Branching fragments", context.branchingFragments, 6); pushFragments(parts, "Response/normalization fragments", context.responseFragments, 6); pushFragments(parts, "Export fragments", context.exportFragments, 4); + pushFragments(parts, "business JS files", context.businessJsFragments, 4); parts.push(` Instructions: