feat(llm-client): add deep extraction with apiEndpoints, staticParams, columnDefs

Add analyzeSceneDeep() function that extracts complete SceneInfo from index.html content including API endpoints, static parameters, column definitions, and business logic. Also adds http module import to support non-HTTPS LLM endpoints. 🤖 Generated with [Qoder][https://qoder.com]
2026-04-17 10:13:29 +08:00
parent dd7b3c582a
commit 517ac6bf39
1 changed files with 182 additions and 2 deletions
--- a/frontend/scene-generator/llm-client.js
+++ b/frontend/scene-generator/llm-client.js
@@ -1,4 +1,5 @@
 const http = require("http");
+const https = require("https");

 const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容，提取 scene-id 和 scene-name。

@@ -12,6 +13,38 @@ scene-name 规则：

 请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;

+const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码，提取关键业务信息。
+
+## 分析目标
+
+1. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, 用途)
+2. **静态参数**: 识别硬编码的业务参数 (key-value pairs)
+3. **列定义**: 识别数据表格/导出的列配置 ([field, label] pairs)
+4. **业务逻辑**: 理解数据获取和转换流程
+5. **场景类型**: 判断是 report_collection 还是 monitoring
+
+## 输出格式
+
+请以 JSON 格式返回：
+{
+  "sceneId": "string - 场景标识 (英文短横线)",
+  "sceneName": "string - 场景中文名",
+  "sceneKind": "report_collection | monitoring",
+  "sourceSystem": "string - 来源系统名 (可选)",
+  "expectedDomain": "string - 目标域名 (可选)",
+  "targetUrl": "string | null - 目标页面URL",
+  "apiEndpoints": [
+    {"name": "string", "url": "string", "method": "GET|POST", "description": "string"}
+  ],
+  "staticParams": {"key": "value"},
+  "columnDefs": [["fieldName", "中文列名"]],
+  "entryMethod": "string - 入口方法名",
+  "businessLogic": {
+    "dataFetch": "string - 数据获取逻辑描述",
+    "dataTransform": "string - 数据转换逻辑描述"
+  }
+}`;
+
 function buildAnalyzePrompt(sourceDir, dirContents) {
  const parts = [];

@@ -44,6 +77,45 @@ function buildAnalyzePrompt(sourceDir, dirContents) {
  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;
 }

+function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) {
+  const parts = [];
+
+  parts.push(`=== 目录结构 ===`);
+  parts.push(dirContents.tree || "(empty)");
+
+  if (dirContents["scene.toml"]) {
+    parts.push(`\n=== scene.toml ===`);
+    parts.push(dirContents["scene.toml"]);
+  }
+
+  if (dirContents["SKILL.toml"]) {
+    parts.push(`\n=== SKILL.toml ===`);
+    parts.push(dirContents["SKILL.toml"]);
+  }
+
+  if (dirContents["SKILL.md"]) {
+    parts.push(`\n=== SKILL.md ===`);
+    parts.push(dirContents["SKILL.md"]);
+  }
+
+  // Include index.html content (key addition)
+  if (indexHtmlContent) {
+    parts.push(`\n=== index.html ===`);
+    // Limit to first 15000 chars to avoid token limits
+    parts.push(indexHtmlContent.substring(0, 15000));
+  }
+
+  if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
+    parts.push(`\n=== 脚本文件 ===`);
+    for (const [name, content] of Object.entries(dirContents.scripts)) {
+      parts.push(`\n--- ${name} ---`);
+      parts.push(content.substring(0, 3000));
+    }
+  }
+
+  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请分析以上代码，提取完整的场景信息。`;
+}
+
 function extractJsonFromResponse(text) {
  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
  if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]);
@@ -56,6 +128,35 @@ function extractJsonFromResponse(text) {
  return JSON.parse(text);
 }

+function extractSceneInfo(text) {
+  // Try code block first
+  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+  if (codeBlockMatch) {
+    try {
+      return JSON.parse(codeBlockMatch[1]);
+    } catch (e) {
+      // fall through
+    }
+  }
+
+  // Try to find JSON object with sceneId
+  const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/);
+  if (jsonMatch) {
+    try {
+      return JSON.parse(jsonMatch[0]);
+    } catch (e) {
+      // fall through
+    }
+  }
+
+  // Last resort: parse entire text
+  try {
+    return JSON.parse(text);
+  } catch (e) {
+    throw new Error("Failed to extract valid SceneInfo JSON from LLM response");
+  }
+}
+
 function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
  const userPrompt = buildAnalyzePrompt(sourceDir, dirContents);

@@ -83,7 +184,7 @@ function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
      },
    };

-    const req = http.request(options, (res) => {
+    const req = https.request(options, (res) => {
      let data = "";
      res.on("data", (chunk) => (data += chunk));
      res.on("end", () => {
@@ -118,4 +219,83 @@ function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
  });
 }

-module.exports = { buildAnalyzePrompt, extractJsonFromResponse, analyzeScene };
+function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) {
+  const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent);
+
+  const requestBody = JSON.stringify({
+    model,
+    messages: [
+      { role: "system", content: DEEP_SYSTEM_PROMPT },
+      { role: "user", content: userPrompt },
+    ],
+    temperature: 0.1,
+    max_tokens: 2048, // Increased for detailed response
+  });
+
+  return new Promise((resolve, reject) => {
+    const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
+    const options = {
+      hostname: url.hostname,
+      port: url.port || (url.protocol === "https:" ? 443 : 80),
+      path: url.pathname,
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Length": Buffer.byteLength(requestBody),
+      },
+    };
+
+    const httpModule = url.protocol === "https:" ? https : http;
+    const req = httpModule.request(options, (res) => {
+      let data = "";
+      res.on("data", (chunk) => (data += chunk));
+      res.on("end", () => {
+        if (res.statusCode !== 200) {
+          return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
+        }
+
+        try {
+          const parsed = JSON.parse(data);
+          const content = parsed.choices?.[0]?.message?.content;
+          if (!content) return reject(new Error("LLM returned empty response"));
+          const result = extractSceneInfo(content);
+
+          // Validate required fields
+          if (!result.sceneId || !result.sceneName) {
+            return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`));
+          }
+
+          // Set defaults for optional fields
+          result.sceneKind = result.sceneKind || "report_collection";
+          result.apiEndpoints = result.apiEndpoints || [];
+          result.staticParams = result.staticParams || {};
+          result.columnDefs = result.columnDefs || [];
+          result.businessLogic = result.businessLogic || {};
+
+          resolve(result);
+        } catch (err) {
+          reject(new Error(`Failed to parse LLM response: ${err.message}`));
+        }
+      });
+    });
+
+    req.on("error", reject);
+    req.setTimeout(60000, () => {
+      req.destroy(new Error("LLM API request timed out"));
+    });
+
+    req.write(requestBody);
+    req.end();
+  });
+}
+
+module.exports = {
+  buildAnalyzePrompt,
+  extractJsonFromResponse,
+  analyzeScene,
+  // New exports
+  buildDeepAnalyzePrompt,
+  extractSceneInfo,
+  analyzeSceneDeep,
+};