From 517ac6bf39659c9339c65705259ba79711d642ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=A8=E7=82=8E?= <635735027@qq.com>
Date: Fri, 17 Apr 2026 10:13:29 +0800
Subject: [PATCH] feat(llm-client): add deep extraction with apiEndpoints,
 staticParams, columnDefs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add analyzeSceneDeep() function that extracts complete SceneInfo from
index.html content including API endpoints, static parameters, column
definitions, and business logic. Also adds http module import to support
non-HTTPS LLM endpoints.

🤖 Generated with [Qoder][https://qoder.com]
---
 frontend/scene-generator/llm-client.js | 184 ++++++++++++++++++++++++-
 1 file changed, 182 insertions(+), 2 deletions(-)

diff --git a/frontend/scene-generator/llm-client.js b/frontend/scene-generator/llm-client.js
index c87dcd3..aaf7a27 100644
--- a/frontend/scene-generator/llm-client.js
+++ b/frontend/scene-generator/llm-client.js
@@ -1,4 +1,5 @@
 const http = require("http");
+const https = require("https");
 
 const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容，提取 scene-id 和 scene-name。
 
@@ -12,6 +13,38 @@ scene-name 规则：
 
 请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;
 
+const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码，提取关键业务信息。
+
+## 分析目标
+
+1. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, 用途)
+2. **静态参数**: 识别硬编码的业务参数 (key-value pairs)
+3. **列定义**: 识别数据表格/导出的列配置 ([field, label] pairs)
+4. **业务逻辑**: 理解数据获取和转换流程
+5. **场景类型**: 判断是 report_collection 还是 monitoring
+
+## 输出格式
+
+请以 JSON 格式返回：
+{
+  "sceneId": "string - 场景标识 (英文短横线)",
+  "sceneName": "string - 场景中文名",
+  "sceneKind": "report_collection | monitoring",
+  "sourceSystem": "string - 来源系统名 (可选)",
+  "expectedDomain": "string - 目标域名 (可选)",
+  "targetUrl": "string | null - 目标页面URL",
+  "apiEndpoints": [
+    {"name": "string", "url": "string", "method": "GET|POST", "description": "string"}
+  ],
+  "staticParams": {"key": "value"},
+  "columnDefs": [["fieldName", "中文列名"]],
+  "entryMethod": "string - 入口方法名",
+  "businessLogic": {
+    "dataFetch": "string - 数据获取逻辑描述",
+    "dataTransform": "string - 数据转换逻辑描述"
+  }
+}`;
+
 function buildAnalyzePrompt(sourceDir, dirContents) {
   const parts = [];
 
@@ -44,6 +77,45 @@ function buildAnalyzePrompt(sourceDir, dirContents) {
   return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;
 }
 
+function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) {
+  const parts = [];
+
+  parts.push(`=== 目录结构 ===`);
+  parts.push(dirContents.tree || "(empty)");
+
+  if (dirContents["scene.toml"]) {
+    parts.push(`\n=== scene.toml ===`);
+    parts.push(dirContents["scene.toml"]);
+  }
+
+  if (dirContents["SKILL.toml"]) {
+    parts.push(`\n=== SKILL.toml ===`);
+    parts.push(dirContents["SKILL.toml"]);
+  }
+
+  if (dirContents["SKILL.md"]) {
+    parts.push(`\n=== SKILL.md ===`);
+    parts.push(dirContents["SKILL.md"]);
+  }
+
+  // Include index.html content (key addition)
+  if (indexHtmlContent) {
+    parts.push(`\n=== index.html ===`);
+    // Limit to first 15000 chars to avoid token limits
+    parts.push(indexHtmlContent.substring(0, 15000));
+  }
+
+  if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
+    parts.push(`\n=== 脚本文件 ===`);
+    for (const [name, content] of Object.entries(dirContents.scripts)) {
+      parts.push(`\n--- ${name} ---`);
+      parts.push(content.substring(0, 3000));
+    }
+  }
+
+  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请分析以上代码，提取完整的场景信息。`;
+}
+
 function extractJsonFromResponse(text) {
   const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
   if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]);
@@ -56,6 +128,35 @@ function extractJsonFromResponse(text) {
   return JSON.parse(text);
 }
 
+function extractSceneInfo(text) {
+  // Try code block first
+  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+  if (codeBlockMatch) {
+    try {
+      return JSON.parse(codeBlockMatch[1]);
+    } catch (e) {
+      // fall through
+    }
+  }
+
+  // Try to find JSON object with sceneId
+  const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/);
+  if (jsonMatch) {
+    try {
+      return JSON.parse(jsonMatch[0]);
+    } catch (e) {
+      // fall through
+    }
+  }
+
+  // Last resort: parse entire text
+  try {
+    return JSON.parse(text);
+  } catch (e) {
+    throw new Error("Failed to extract valid SceneInfo JSON from LLM response");
+  }
+}
+
 function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
   const userPrompt = buildAnalyzePrompt(sourceDir, dirContents);
 
@@ -83,7 +184,7 @@ function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
       },
     };
 
-    const req = http.request(options, (res) => {
+    const req = https.request(options, (res) => {
       let data = "";
       res.on("data", (chunk) => (data += chunk));
       res.on("end", () => {
@@ -118,4 +219,83 @@ function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
   });
 }
 
-module.exports = { buildAnalyzePrompt, extractJsonFromResponse, analyzeScene };
+function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) {
+  const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent);
+
+  const requestBody = JSON.stringify({
+    model,
+    messages: [
+      { role: "system", content: DEEP_SYSTEM_PROMPT },
+      { role: "user", content: userPrompt },
+    ],
+    temperature: 0.1,
+    max_tokens: 2048, // Increased for detailed response
+  });
+
+  return new Promise((resolve, reject) => {
+    const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
+    const options = {
+      hostname: url.hostname,
+      port: url.port || (url.protocol === "https:" ? 443 : 80),
+      path: url.pathname,
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Length": Buffer.byteLength(requestBody),
+      },
+    };
+
+    const httpModule = url.protocol === "https:" ? https : http;
+    const req = httpModule.request(options, (res) => {
+      let data = "";
+      res.on("data", (chunk) => (data += chunk));
+      res.on("end", () => {
+        if (res.statusCode !== 200) {
+          return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
+        }
+
+        try {
+          const parsed = JSON.parse(data);
+          const content = parsed.choices?.[0]?.message?.content;
+          if (!content) return reject(new Error("LLM returned empty response"));
+          const result = extractSceneInfo(content);
+
+          // Validate required fields
+          if (!result.sceneId || !result.sceneName) {
+            return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`));
+          }
+
+          // Set defaults for optional fields
+          result.sceneKind = result.sceneKind || "report_collection";
+          result.apiEndpoints = result.apiEndpoints || [];
+          result.staticParams = result.staticParams || {};
+          result.columnDefs = result.columnDefs || [];
+          result.businessLogic = result.businessLogic || {};
+
+          resolve(result);
+        } catch (err) {
+          reject(new Error(`Failed to parse LLM response: ${err.message}`));
+        }
+      });
+    });
+
+    req.on("error", reject);
+    req.setTimeout(60000, () => {
+      req.destroy(new Error("LLM API request timed out"));
+    });
+
+    req.write(requestBody);
+    req.end();
+  });
+}
+
+module.exports = {
+  buildAnalyzePrompt,
+  extractJsonFromResponse,
+  analyzeScene,
+  // New exports
+  buildDeepAnalyzePrompt,
+  extractSceneInfo,
+  analyzeSceneDeep,
+};