Files
claw/frontend/scene-generator/llm-client.js
木炎 517ac6bf39 feat(llm-client): add deep extraction with apiEndpoints, staticParams, columnDefs
Add analyzeSceneDeep() function that extracts complete SceneInfo from
index.html content including API endpoints, static parameters, column
definitions, and business logic. Also adds http module import to support
non-HTTPS LLM endpoints.

🤖 Generated with [Qoder][https://qoder.com]
2026-04-17 10:13:29 +08:00

302 lines
9.1 KiB
JavaScript

const http = require("http");
const https = require("https");
const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容,提取 scene-id 和 scene-name。
scene-id 规则:
- 使用英文短横线连接,如 tq-lineloss-report
- 全小写,有业务含义
scene-name 规则:
- 使用中文,简短描述性名称
- 如 "台区线损报表"、"知乎热榜导出"
请以 JSON 格式返回:{"sceneId": "...", "sceneName": "..."}`;
const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码,提取关键业务信息。
## 分析目标
1. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, 用途)
2. **静态参数**: 识别硬编码的业务参数 (key-value pairs)
3. **列定义**: 识别数据表格/导出的列配置 ([field, label] pairs)
4. **业务逻辑**: 理解数据获取和转换流程
5. **场景类型**: 判断是 report_collection 还是 monitoring
## 输出格式
请以 JSON 格式返回:
{
"sceneId": "string - 场景标识 (英文短横线)",
"sceneName": "string - 场景中文名",
"sceneKind": "report_collection | monitoring",
"sourceSystem": "string - 来源系统名 (可选)",
"expectedDomain": "string - 目标域名 (可选)",
"targetUrl": "string | null - 目标页面URL",
"apiEndpoints": [
{"name": "string", "url": "string", "method": "GET|POST", "description": "string"}
],
"staticParams": {"key": "value"},
"columnDefs": [["fieldName", "中文列名"]],
"entryMethod": "string - 入口方法名",
"businessLogic": {
"dataFetch": "string - 数据获取逻辑描述",
"dataTransform": "string - 数据转换逻辑描述"
}
}`;
function buildAnalyzePrompt(sourceDir, dirContents) {
const parts = [];
parts.push(`=== 目录结构 ===`);
parts.push(dirContents.tree || "(empty)");
if (dirContents["scene.toml"]) {
parts.push(`\n=== scene.toml ===`);
parts.push(dirContents["scene.toml"]);
}
if (dirContents["SKILL.toml"]) {
parts.push(`\n=== SKILL.toml ===`);
parts.push(dirContents["SKILL.toml"]);
}
if (dirContents["SKILL.md"]) {
parts.push(`\n=== SKILL.md ===`);
parts.push(dirContents["SKILL.md"]);
}
if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
parts.push(`\n=== 脚本文件 ===`);
for (const [name, content] of Object.entries(dirContents.scripts)) {
parts.push(`\n--- ${name} ---`);
parts.push(content.substring(0, 2000));
}
}
return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请以 JSON 格式返回:{"sceneId": "...", "sceneName": "..."}`;
}
function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) {
const parts = [];
parts.push(`=== 目录结构 ===`);
parts.push(dirContents.tree || "(empty)");
if (dirContents["scene.toml"]) {
parts.push(`\n=== scene.toml ===`);
parts.push(dirContents["scene.toml"]);
}
if (dirContents["SKILL.toml"]) {
parts.push(`\n=== SKILL.toml ===`);
parts.push(dirContents["SKILL.toml"]);
}
if (dirContents["SKILL.md"]) {
parts.push(`\n=== SKILL.md ===`);
parts.push(dirContents["SKILL.md"]);
}
// Include index.html content (key addition)
if (indexHtmlContent) {
parts.push(`\n=== index.html ===`);
// Limit to first 15000 chars to avoid token limits
parts.push(indexHtmlContent.substring(0, 15000));
}
if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
parts.push(`\n=== 脚本文件 ===`);
for (const [name, content] of Object.entries(dirContents.scripts)) {
parts.push(`\n--- ${name} ---`);
parts.push(content.substring(0, 3000));
}
}
return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请分析以上代码,提取完整的场景信息。`;
}
function extractJsonFromResponse(text) {
const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]);
const jsonMatch = text.match(
/\{[\s\S]*"sceneId"[\s\S]*"sceneName"[\s\S]*\}/
);
if (jsonMatch) return JSON.parse(jsonMatch[0]);
return JSON.parse(text);
}
function extractSceneInfo(text) {
// Try code block first
const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
if (codeBlockMatch) {
try {
return JSON.parse(codeBlockMatch[1]);
} catch (e) {
// fall through
}
}
// Try to find JSON object with sceneId
const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/);
if (jsonMatch) {
try {
return JSON.parse(jsonMatch[0]);
} catch (e) {
// fall through
}
}
// Last resort: parse entire text
try {
return JSON.parse(text);
} catch (e) {
throw new Error("Failed to extract valid SceneInfo JSON from LLM response");
}
}
function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
const userPrompt = buildAnalyzePrompt(sourceDir, dirContents);
const requestBody = JSON.stringify({
model,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: userPrompt },
],
temperature: 0.1,
max_tokens: 256,
});
return new Promise((resolve, reject) => {
const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
const options = {
hostname: url.hostname,
port: url.port || (url.protocol === "https:" ? 443 : 80),
path: url.pathname,
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
"Content-Length": Buffer.byteLength(requestBody),
},
};
const req = https.request(options, (res) => {
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => {
if (res.statusCode !== 200) {
return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
}
try {
const parsed = JSON.parse(data);
const content = parsed.choices?.[0]?.message?.content;
if (!content) return reject(new Error("LLM returned empty response"));
const result = extractJsonFromResponse(content);
if (!result.sceneId || !result.sceneName) {
return reject(
new Error(`LLM response missing sceneId/sceneName: ${content}`)
);
}
resolve(result);
} catch (err) {
reject(new Error(`Failed to parse LLM response: ${err.message}`));
}
});
});
req.on("error", reject);
req.setTimeout(30000, () => {
req.destroy(new Error("LLM API request timed out"));
});
req.write(requestBody);
req.end();
});
}
function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) {
const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent);
const requestBody = JSON.stringify({
model,
messages: [
{ role: "system", content: DEEP_SYSTEM_PROMPT },
{ role: "user", content: userPrompt },
],
temperature: 0.1,
max_tokens: 2048, // Increased for detailed response
});
return new Promise((resolve, reject) => {
const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
const options = {
hostname: url.hostname,
port: url.port || (url.protocol === "https:" ? 443 : 80),
path: url.pathname,
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
"Content-Length": Buffer.byteLength(requestBody),
},
};
const httpModule = url.protocol === "https:" ? https : http;
const req = httpModule.request(options, (res) => {
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => {
if (res.statusCode !== 200) {
return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
}
try {
const parsed = JSON.parse(data);
const content = parsed.choices?.[0]?.message?.content;
if (!content) return reject(new Error("LLM returned empty response"));
const result = extractSceneInfo(content);
// Validate required fields
if (!result.sceneId || !result.sceneName) {
return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`));
}
// Set defaults for optional fields
result.sceneKind = result.sceneKind || "report_collection";
result.apiEndpoints = result.apiEndpoints || [];
result.staticParams = result.staticParams || {};
result.columnDefs = result.columnDefs || [];
result.businessLogic = result.businessLogic || {};
resolve(result);
} catch (err) {
reject(new Error(`Failed to parse LLM response: ${err.message}`));
}
});
});
req.on("error", reject);
req.setTimeout(60000, () => {
req.destroy(new Error("LLM API request timed out"));
});
req.write(requestBody);
req.end();
});
}
module.exports = {
buildAnalyzePrompt,
extractJsonFromResponse,
analyzeScene,
// New exports
buildDeepAnalyzePrompt,
extractSceneInfo,
analyzeSceneDeep,
};