claw/frontend/scene-generator/llm-client.js

const http = require("http");
const https = require("https");

const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容，提取 scene-id 和 scene-name。

scene-id 规则：
- 使用英文短横线连接，如 tq-lineloss-report
- 全小写，有业务含义

scene-name 规则：
- 使用中文，简短描述性名称
- 如 "台区线损报表"、"知乎热榜导出"

请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;

const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码，提取关键业务信息。

## 分析目标

1. **多模式识别** (关键):
   - 查找条件分支逻辑 (if/switch) 中基于 period_mode、reportType 等字段的分支
   - 识别不同分支对应的 API 端点、列定义、请求格式
   - 如果发现多模式，使用 modes 数组格式输出

2. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, contentType, 用途)
   - 从 \$.ajax/fetch 调用中提取 contentType
   - 检测请求格式: application/json 或 application/x-www-form-urlencoded

3. **请求模板**: 识别请求参数结构
   - 提取硬编码的分页参数 (rows, page, sidx, sord)
   - 识别模板变量如 \${args.org_code}

4. **数据归一化**: 识别数据处理规则
   - 查找数据渲染/表格填充逻辑
   - 检测数据验证条件 (哪些字段不能为空)

5. **响应路径**: 识别数据在响应中的位置
   - 如 response.content 或 response.data

## 输出格式

### 单模式场景 (无 modes 数组):
{
  "sceneId": "string",
  "sceneName": "string",
  "sceneKind": "report_collection | monitoring",
  "expectedDomain": "string",
  "targetUrl": "string",
  "apiEndpoints": [{"name": "", "url": "", "method": "POST"}],
  "staticParams": {"key": "value"},
  "columnDefs": [["fieldName", "中文列名"]]
}

### 多模式场景 (有 modes 数组):
{
  "sceneId": "tq-lineloss-report",
  "sceneName": "台区线损报表",
  "sceneKind": "report_collection",
  "modes": [
    {
      "name": "month",
      "label": "月度报表",
      "condition": {"field": "period_mode", "operator": "equals", "value": "month"},
      "apiEndpoint": {
        "name": "月度线损查询",
        "url": "http://...",
        "method": "POST",
        "contentType": "application/x-www-form-urlencoded"
      },
      "columnDefs": [["ORG_NAME", "供电单位"], ...],
      "requestTemplate": {"orgno": "\${args.org_code}", "rows": 1000, "page": 1},
      "normalizeRules": {"type": "validate_all_columns", "filterNull": true},
      "responsePath": "content"
    },
    {
      "name": "week",
      "label": "周报表",
      "condition": {"field": "period_mode", "operator": "equals", "value": "week"},
      "apiEndpoint": {...},
      "columnDefs": [...],
      ...
    }
  ],
  "defaultMode": "month",
  "modeSwitchField": "period_mode"
}

**重要**: 如果发现代码中有基于 period_mode 的 if/switch 分支，必须使用多模式格式输出！`;

function buildAnalyzePrompt(sourceDir, dirContents) {
  const parts = [];

  parts.push(`=== 目录结构 ===`);
  parts.push(dirContents.tree || "(empty)");

  if (dirContents["scene.toml"]) {
    parts.push(`\n=== scene.toml ===`);
    parts.push(dirContents["scene.toml"]);
  }

  if (dirContents["SKILL.toml"]) {
    parts.push(`\n=== SKILL.toml ===`);
    parts.push(dirContents["SKILL.toml"]);
  }

  if (dirContents["SKILL.md"]) {
    parts.push(`\n=== SKILL.md ===`);
    parts.push(dirContents["SKILL.md"]);
  }

  if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
    parts.push(`\n=== 脚本文件 ===`);
    for (const [name, content] of Object.entries(dirContents.scripts)) {
      parts.push(`\n--- ${name} ---`);
      const contentStr = typeof content === 'string' ? content : String(content || '');
      parts.push(contentStr.substring(0, 2000));
    }
  }

  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;
}

function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) {
  const parts = [];

  parts.push(`=== 目录结构 ===`);
  parts.push(dirContents.tree || "(empty)");

  if (dirContents["scene.toml"]) {
    parts.push(`\n=== scene.toml ===`);
    parts.push(dirContents["scene.toml"]);
  }

  if (dirContents["SKILL.toml"]) {
    parts.push(`\n=== SKILL.toml ===`);
    parts.push(dirContents["SKILL.toml"]);
  }

  if (dirContents["SKILL.md"]) {
    parts.push(`\n=== SKILL.md ===`);
    parts.push(dirContents["SKILL.md"]);
  }

  // Include index.html content (key addition)
  if (indexHtmlContent && typeof indexHtmlContent === 'string') {
    parts.push(`\n=== index.html ===`);
    // Limit to first 15000 chars to avoid token limits
    parts.push(indexHtmlContent.substring(0, 15000));
  }

  if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
    parts.push(`\n=== 脚本文件 ===`);
    for (const [name, content] of Object.entries(dirContents.scripts)) {
      parts.push(`\n--- ${name} ---`);
      const contentStr = typeof content === 'string' ? content : String(content || '');
      parts.push(contentStr.substring(0, 3000));
    }
  }

  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请分析以上代码，提取完整的场景信息。`;
}

function extractJsonFromResponse(text) {
  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
  if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]);

  const jsonMatch = text.match(
    /\{[\s\S]*"sceneId"[\s\S]*"sceneName"[\s\S]*\}/
  );
  if (jsonMatch) return JSON.parse(jsonMatch[0]);

  return JSON.parse(text);
}

function extractSceneInfo(text) {
  // Try code block first
  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
  if (codeBlockMatch) {
    try {
      return JSON.parse(codeBlockMatch[1]);
    } catch (e) {
      // fall through
    }
  }

  // Try to find JSON object with sceneId
  const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/);
  if (jsonMatch) {
    try {
      return JSON.parse(jsonMatch[0]);
    } catch (e) {
      // fall through
    }
  }

  // Last resort: parse entire text
  try {
    return JSON.parse(text);
  } catch (e) {
    throw new Error("Failed to extract valid SceneInfo JSON from LLM response");
  }
}

function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
  const userPrompt = buildAnalyzePrompt(sourceDir, dirContents);

  const requestBody = JSON.stringify({
    model,
    messages: [
      { role: "system", content: SYSTEM_PROMPT },
      { role: "user", content: userPrompt },
    ],
    temperature: 0.1,
    max_tokens: 256,
  });

  return new Promise((resolve, reject) => {
    const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
    const options = {
      hostname: url.hostname,
      port: url.port || (url.protocol === "https:" ? 443 : 80),
      path: url.pathname,
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${apiKey}`,
        "Content-Length": Buffer.byteLength(requestBody),
      },
    };

    const req = https.request(options, (res) => {
      let data = "";
      res.on("data", (chunk) => (data += chunk));
      res.on("end", () => {
        if (res.statusCode !== 200) {
          return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
        }

        try {
          const parsed = JSON.parse(data);
          const content = parsed.choices?.[0]?.message?.content;
          if (!content) return reject(new Error("LLM returned empty response"));
          const result = extractJsonFromResponse(content);
          if (!result.sceneId || !result.sceneName) {
            return reject(
              new Error(`LLM response missing sceneId/sceneName: ${content}`)
            );
          }
          resolve(result);
        } catch (err) {
          reject(new Error(`Failed to parse LLM response: ${err.message}`));
        }
      });
    });

    req.on("error", reject);
    req.setTimeout(30000, () => {
      req.destroy(new Error("LLM API request timed out"));
    });

    req.write(requestBody);
    req.end();
  });
}

function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) {
  const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent);

  const requestBody = JSON.stringify({
    model,
    messages: [
      { role: "system", content: DEEP_SYSTEM_PROMPT },
      { role: "user", content: userPrompt },
    ],
    temperature: 0.1,
    max_tokens: 2048, // Increased for detailed response
  });

  return new Promise((resolve, reject) => {
    const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
    const options = {
      hostname: url.hostname,
      port: url.port || (url.protocol === "https:" ? 443 : 80),
      path: url.pathname,
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${apiKey}`,
        "Content-Length": Buffer.byteLength(requestBody),
      },
    };

    const httpModule = url.protocol === "https:" ? https : http;
    const req = httpModule.request(options, (res) => {
      let data = "";
      res.on("data", (chunk) => (data += chunk));
      res.on("end", () => {
        if (res.statusCode !== 200) {
          return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
        }

        try {
          const parsed = JSON.parse(data);
          const content = parsed.choices?.[0]?.message?.content;
          if (!content) return reject(new Error("LLM returned empty response"));
          const result = extractSceneInfo(content);

          // Validate required fields
          if (!result.sceneId || !result.sceneName) {
            return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`));
          }

          // Set defaults for optional fields
          result.sceneKind = result.sceneKind || "report_collection";
          result.apiEndpoints = result.apiEndpoints || [];
          result.staticParams = result.staticParams || {};
          result.columnDefs = result.columnDefs || [];
          result.businessLogic = result.businessLogic || {};
          result.modes = result.modes || [];
          result.defaultMode = result.defaultMode || (result.modes.length > 0 ? result.modes[0].name : null);
          result.modeSwitchField = result.modeSwitchField || "period_mode";

          resolve(result);
        } catch (err) {
          reject(new Error(`Failed to parse LLM response: ${err.message}`));
        }
      });
    });

    req.on("error", reject);
    req.setTimeout(60000, () => {
      req.destroy(new Error("LLM API request timed out"));
    });

    req.write(requestBody);
    req.end();
  });
}

module.exports = {
  buildAnalyzePrompt,
  extractJsonFromResponse,
  analyzeScene,
  // New exports
  buildDeepAnalyzePrompt,
  extractSceneInfo,
  analyzeSceneDeep,
};