From 517ac6bf39659c9339c65705259ba79711d642ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=A8=E7=82=8E?= <635735027@qq.com> Date: Fri, 17 Apr 2026 10:13:29 +0800 Subject: [PATCH] feat(llm-client): add deep extraction with apiEndpoints, staticParams, columnDefs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add analyzeSceneDeep() function that extracts complete SceneInfo from index.html content including API endpoints, static parameters, column definitions, and business logic. Also adds http module import to support non-HTTPS LLM endpoints. 🤖 Generated with [Qoder][https://qoder.com] --- frontend/scene-generator/llm-client.js | 184 ++++++++++++++++++++++++- 1 file changed, 182 insertions(+), 2 deletions(-) diff --git a/frontend/scene-generator/llm-client.js b/frontend/scene-generator/llm-client.js index c87dcd3..aaf7a27 100644 --- a/frontend/scene-generator/llm-client.js +++ b/frontend/scene-generator/llm-client.js @@ -1,4 +1,5 @@ const http = require("http"); +const https = require("https"); const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容,提取 scene-id 和 scene-name。 @@ -12,6 +13,38 @@ scene-name 规则: 请以 JSON 格式返回:{"sceneId": "...", "sceneName": "..."}`; +const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码,提取关键业务信息。 + +## 分析目标 + +1. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, 用途) +2. **静态参数**: 识别硬编码的业务参数 (key-value pairs) +3. **列定义**: 识别数据表格/导出的列配置 ([field, label] pairs) +4. **业务逻辑**: 理解数据获取和转换流程 +5. **场景类型**: 判断是 report_collection 还是 monitoring + +## 输出格式 + +请以 JSON 格式返回: +{ + "sceneId": "string - 场景标识 (英文短横线)", + "sceneName": "string - 场景中文名", + "sceneKind": "report_collection | monitoring", + "sourceSystem": "string - 来源系统名 (可选)", + "expectedDomain": "string - 目标域名 (可选)", + "targetUrl": "string | null - 目标页面URL", + "apiEndpoints": [ + {"name": "string", "url": "string", "method": "GET|POST", "description": "string"} + ], + "staticParams": {"key": "value"}, + "columnDefs": [["fieldName", "中文列名"]], + "entryMethod": "string - 入口方法名", + "businessLogic": { + "dataFetch": "string - 数据获取逻辑描述", + "dataTransform": "string - 数据转换逻辑描述" + } +}`; + function buildAnalyzePrompt(sourceDir, dirContents) { const parts = []; @@ -44,6 +77,45 @@ function buildAnalyzePrompt(sourceDir, dirContents) { return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请以 JSON 格式返回:{"sceneId": "...", "sceneName": "..."}`; } +function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) { + const parts = []; + + parts.push(`=== 目录结构 ===`); + parts.push(dirContents.tree || "(empty)"); + + if (dirContents["scene.toml"]) { + parts.push(`\n=== scene.toml ===`); + parts.push(dirContents["scene.toml"]); + } + + if (dirContents["SKILL.toml"]) { + parts.push(`\n=== SKILL.toml ===`); + parts.push(dirContents["SKILL.toml"]); + } + + if (dirContents["SKILL.md"]) { + parts.push(`\n=== SKILL.md ===`); + parts.push(dirContents["SKILL.md"]); + } + + // Include index.html content (key addition) + if (indexHtmlContent) { + parts.push(`\n=== index.html ===`); + // Limit to first 15000 chars to avoid token limits + parts.push(indexHtmlContent.substring(0, 15000)); + } + + if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) { + parts.push(`\n=== 脚本文件 ===`); + for (const [name, content] of Object.entries(dirContents.scripts)) { + parts.push(`\n--- ${name} ---`); + parts.push(content.substring(0, 3000)); + } + } + + return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请分析以上代码,提取完整的场景信息。`; +} + function extractJsonFromResponse(text) { const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]); @@ -56,6 +128,35 @@ function extractJsonFromResponse(text) { return JSON.parse(text); } +function extractSceneInfo(text) { + // Try code block first + const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/); + if (codeBlockMatch) { + try { + return JSON.parse(codeBlockMatch[1]); + } catch (e) { + // fall through + } + } + + // Try to find JSON object with sceneId + const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/); + if (jsonMatch) { + try { + return JSON.parse(jsonMatch[0]); + } catch (e) { + // fall through + } + } + + // Last resort: parse entire text + try { + return JSON.parse(text); + } catch (e) { + throw new Error("Failed to extract valid SceneInfo JSON from LLM response"); + } +} + function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) { const userPrompt = buildAnalyzePrompt(sourceDir, dirContents); @@ -83,7 +184,7 @@ function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) { }, }; - const req = http.request(options, (res) => { + const req = https.request(options, (res) => { let data = ""; res.on("data", (chunk) => (data += chunk)); res.on("end", () => { @@ -118,4 +219,83 @@ function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) { }); } -module.exports = { buildAnalyzePrompt, extractJsonFromResponse, analyzeScene }; +function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) { + const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent); + + const requestBody = JSON.stringify({ + model, + messages: [ + { role: "system", content: DEEP_SYSTEM_PROMPT }, + { role: "user", content: userPrompt }, + ], + temperature: 0.1, + max_tokens: 2048, // Increased for detailed response + }); + + return new Promise((resolve, reject) => { + const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions"); + const options = { + hostname: url.hostname, + port: url.port || (url.protocol === "https:" ? 443 : 80), + path: url.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + "Content-Length": Buffer.byteLength(requestBody), + }, + }; + + const httpModule = url.protocol === "https:" ? https : http; + const req = httpModule.request(options, (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => { + if (res.statusCode !== 200) { + return reject(new Error(`LLM API error ${res.statusCode}: ${data}`)); + } + + try { + const parsed = JSON.parse(data); + const content = parsed.choices?.[0]?.message?.content; + if (!content) return reject(new Error("LLM returned empty response")); + const result = extractSceneInfo(content); + + // Validate required fields + if (!result.sceneId || !result.sceneName) { + return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`)); + } + + // Set defaults for optional fields + result.sceneKind = result.sceneKind || "report_collection"; + result.apiEndpoints = result.apiEndpoints || []; + result.staticParams = result.staticParams || {}; + result.columnDefs = result.columnDefs || []; + result.businessLogic = result.businessLogic || {}; + + resolve(result); + } catch (err) { + reject(new Error(`Failed to parse LLM response: ${err.message}`)); + } + }); + }); + + req.on("error", reject); + req.setTimeout(60000, () => { + req.destroy(new Error("LLM API request timed out")); + }); + + req.write(requestBody); + req.end(); + }); +} + +module.exports = { + buildAnalyzePrompt, + extractJsonFromResponse, + analyzeScene, + // New exports + buildDeepAnalyzePrompt, + extractSceneInfo, + analyzeSceneDeep, +};