Files
claw/frontend/scene-generator/llm-client.js
2026-04-17 13:07:10 +08:00

349 lines
11 KiB
JavaScript

const http = require("http");
const https = require("https");
const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容,提取 scene-id 和 scene-name。
scene-id 规则:
- 使用英文短横线连接,如 tq-lineloss-report
- 全小写,有业务含义
scene-name 规则:
- 使用中文,简短描述性名称
- 如 "台区线损报表"、"知乎热榜导出"
请以 JSON 格式返回:{"sceneId": "...", "sceneName": "..."}`;
const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码,提取关键业务信息。
## 分析目标
1. **多模式识别** (关键):
- 查找条件分支逻辑 (if/switch) 中基于 period_mode、reportType 等字段的分支
- 识别不同分支对应的 API 端点、列定义、请求格式
- 如果发现多模式,使用 modes 数组格式输出
2. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, contentType, 用途)
- 从 \$.ajax/fetch 调用中提取 contentType
- 检测请求格式: application/json 或 application/x-www-form-urlencoded
3. **请求模板**: 识别请求参数结构
- 提取硬编码的分页参数 (rows, page, sidx, sord)
- 识别模板变量如 \${args.org_code}
4. **数据归一化**: 识别数据处理规则
- 查找数据渲染/表格填充逻辑
- 检测数据验证条件 (哪些字段不能为空)
5. **响应路径**: 识别数据在响应中的位置
- 如 response.content 或 response.data
## 输出格式
### 单模式场景 (无 modes 数组):
{
"sceneId": "string",
"sceneName": "string",
"sceneKind": "report_collection | monitoring",
"expectedDomain": "string",
"targetUrl": "string",
"apiEndpoints": [{"name": "", "url": "", "method": "POST"}],
"staticParams": {"key": "value"},
"columnDefs": [["fieldName", "中文列名"]]
}
### 多模式场景 (有 modes 数组):
{
"sceneId": "tq-lineloss-report",
"sceneName": "台区线损报表",
"sceneKind": "report_collection",
"modes": [
{
"name": "month",
"label": "月度报表",
"condition": {"field": "period_mode", "operator": "equals", "value": "month"},
"apiEndpoint": {
"name": "月度线损查询",
"url": "http://...",
"method": "POST",
"contentType": "application/x-www-form-urlencoded"
},
"columnDefs": [["ORG_NAME", "供电单位"], ...],
"requestTemplate": {"orgno": "\${args.org_code}", "rows": 1000, "page": 1},
"normalizeRules": {"type": "validate_all_columns", "filterNull": true},
"responsePath": "content"
},
{
"name": "week",
"label": "周报表",
"condition": {"field": "period_mode", "operator": "equals", "value": "week"},
"apiEndpoint": {...},
"columnDefs": [...],
...
}
],
"defaultMode": "month",
"modeSwitchField": "period_mode"
}
**重要**: 如果发现代码中有基于 period_mode 的 if/switch 分支,必须使用多模式格式输出!`;
function buildAnalyzePrompt(sourceDir, dirContents) {
const parts = [];
parts.push(`=== 目录结构 ===`);
parts.push(dirContents.tree || "(empty)");
if (dirContents["scene.toml"]) {
parts.push(`\n=== scene.toml ===`);
parts.push(dirContents["scene.toml"]);
}
if (dirContents["SKILL.toml"]) {
parts.push(`\n=== SKILL.toml ===`);
parts.push(dirContents["SKILL.toml"]);
}
if (dirContents["SKILL.md"]) {
parts.push(`\n=== SKILL.md ===`);
parts.push(dirContents["SKILL.md"]);
}
if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
parts.push(`\n=== 脚本文件 ===`);
for (const [name, content] of Object.entries(dirContents.scripts)) {
parts.push(`\n--- ${name} ---`);
const contentStr = typeof content === 'string' ? content : String(content || '');
parts.push(contentStr.substring(0, 2000));
}
}
return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请以 JSON 格式返回:{"sceneId": "...", "sceneName": "..."}`;
}
function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) {
const parts = [];
parts.push(`=== 目录结构 ===`);
parts.push(dirContents.tree || "(empty)");
if (dirContents["scene.toml"]) {
parts.push(`\n=== scene.toml ===`);
parts.push(dirContents["scene.toml"]);
}
if (dirContents["SKILL.toml"]) {
parts.push(`\n=== SKILL.toml ===`);
parts.push(dirContents["SKILL.toml"]);
}
if (dirContents["SKILL.md"]) {
parts.push(`\n=== SKILL.md ===`);
parts.push(dirContents["SKILL.md"]);
}
// Include index.html content (key addition)
if (indexHtmlContent && typeof indexHtmlContent === 'string') {
parts.push(`\n=== index.html ===`);
// Limit to first 15000 chars to avoid token limits
parts.push(indexHtmlContent.substring(0, 15000));
}
if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
parts.push(`\n=== 脚本文件 ===`);
for (const [name, content] of Object.entries(dirContents.scripts)) {
parts.push(`\n--- ${name} ---`);
const contentStr = typeof content === 'string' ? content : String(content || '');
parts.push(contentStr.substring(0, 3000));
}
}
return `以下是场景目录 "${sourceDir}" 的内容:\n\n${parts.join("\n")}\n\n请分析以上代码,提取完整的场景信息。`;
}
function extractJsonFromResponse(text) {
const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]);
const jsonMatch = text.match(
/\{[\s\S]*"sceneId"[\s\S]*"sceneName"[\s\S]*\}/
);
if (jsonMatch) return JSON.parse(jsonMatch[0]);
return JSON.parse(text);
}
function extractSceneInfo(text) {
// Try code block first
const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
if (codeBlockMatch) {
try {
return JSON.parse(codeBlockMatch[1]);
} catch (e) {
// fall through
}
}
// Try to find JSON object with sceneId
const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/);
if (jsonMatch) {
try {
return JSON.parse(jsonMatch[0]);
} catch (e) {
// fall through
}
}
// Last resort: parse entire text
try {
return JSON.parse(text);
} catch (e) {
throw new Error("Failed to extract valid SceneInfo JSON from LLM response");
}
}
function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
const userPrompt = buildAnalyzePrompt(sourceDir, dirContents);
const requestBody = JSON.stringify({
model,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: userPrompt },
],
temperature: 0.1,
max_tokens: 256,
});
return new Promise((resolve, reject) => {
const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
const options = {
hostname: url.hostname,
port: url.port || (url.protocol === "https:" ? 443 : 80),
path: url.pathname,
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
"Content-Length": Buffer.byteLength(requestBody),
},
};
const req = https.request(options, (res) => {
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => {
if (res.statusCode !== 200) {
return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
}
try {
const parsed = JSON.parse(data);
const content = parsed.choices?.[0]?.message?.content;
if (!content) return reject(new Error("LLM returned empty response"));
const result = extractJsonFromResponse(content);
if (!result.sceneId || !result.sceneName) {
return reject(
new Error(`LLM response missing sceneId/sceneName: ${content}`)
);
}
resolve(result);
} catch (err) {
reject(new Error(`Failed to parse LLM response: ${err.message}`));
}
});
});
req.on("error", reject);
req.setTimeout(30000, () => {
req.destroy(new Error("LLM API request timed out"));
});
req.write(requestBody);
req.end();
});
}
function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) {
const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent);
const requestBody = JSON.stringify({
model,
messages: [
{ role: "system", content: DEEP_SYSTEM_PROMPT },
{ role: "user", content: userPrompt },
],
temperature: 0.1,
max_tokens: 2048, // Increased for detailed response
});
return new Promise((resolve, reject) => {
const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
const options = {
hostname: url.hostname,
port: url.port || (url.protocol === "https:" ? 443 : 80),
path: url.pathname,
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
"Content-Length": Buffer.byteLength(requestBody),
},
};
const httpModule = url.protocol === "https:" ? https : http;
const req = httpModule.request(options, (res) => {
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => {
if (res.statusCode !== 200) {
return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
}
try {
const parsed = JSON.parse(data);
const content = parsed.choices?.[0]?.message?.content;
if (!content) return reject(new Error("LLM returned empty response"));
const result = extractSceneInfo(content);
// Validate required fields
if (!result.sceneId || !result.sceneName) {
return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`));
}
// Set defaults for optional fields
result.sceneKind = result.sceneKind || "report_collection";
result.apiEndpoints = result.apiEndpoints || [];
result.staticParams = result.staticParams || {};
result.columnDefs = result.columnDefs || [];
result.businessLogic = result.businessLogic || {};
result.modes = result.modes || [];
result.defaultMode = result.defaultMode || (result.modes.length > 0 ? result.modes[0].name : null);
result.modeSwitchField = result.modeSwitchField || "period_mode";
resolve(result);
} catch (err) {
reject(new Error(`Failed to parse LLM response: ${err.message}`));
}
});
});
req.on("error", reject);
req.setTimeout(60000, () => {
req.destroy(new Error("LLM API request timed out"));
});
req.write(requestBody);
req.end();
});
}
module.exports = {
buildAnalyzePrompt,
extractJsonFromResponse,
analyzeScene,
// New exports
buildDeepAnalyzePrompt,
extractSceneInfo,
analyzeSceneDeep,
};