claw/frontend/scene-generator/llm-client.js

const http = require("http");
const https = require("https");

const SYSTEM_PROMPT = `你是一个场景信息提取助手。根据场景目录的内容，提取 scene-id 和 scene-name。

scene-id 规则：
- 使用英文短横线连接，如 tq-lineloss-report
- 全小写，有业务含义

scene-name 规则：
- 使用中文，简短描述性名称
- 如 "台区线损报表"、"知乎热榜导出"

请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;

const DEEP_SYSTEM_PROMPT = `你是一个场景代码分析专家。分析场景源码，提取关键业务信息。

## 分析目标

1. **API 端点**: 识别所有 HTTP 请求地址 (URL, method, 用途)
2. **静态参数**: 识别硬编码的业务参数 (key-value pairs)
3. **列定义**: 识别数据表格/导出的列配置 ([field, label] pairs)
4. **业务逻辑**: 理解数据获取和转换流程
5. **场景类型**: 判断是 report_collection 还是 monitoring

## 输出格式

请以 JSON 格式返回：
{
  "sceneId": "string - 场景标识 (英文短横线)",
  "sceneName": "string - 场景中文名",
  "sceneKind": "report_collection | monitoring",
  "sourceSystem": "string - 来源系统名 (可选)",
  "expectedDomain": "string - 目标域名 (可选)",
  "targetUrl": "string | null - 目标页面URL",
  "apiEndpoints": [
    {"name": "string", "url": "string", "method": "GET|POST", "description": "string"}
  ],
  "staticParams": {"key": "value"},
  "columnDefs": [["fieldName", "中文列名"]],
  "entryMethod": "string - 入口方法名",
  "businessLogic": {
    "dataFetch": "string - 数据获取逻辑描述",
    "dataTransform": "string - 数据转换逻辑描述"
  }
}`;

function buildAnalyzePrompt(sourceDir, dirContents) {
  const parts = [];

  parts.push(`=== 目录结构 ===`);
  parts.push(dirContents.tree || "(empty)");

  if (dirContents["scene.toml"]) {
    parts.push(`\n=== scene.toml ===`);
    parts.push(dirContents["scene.toml"]);
  }

  if (dirContents["SKILL.toml"]) {
    parts.push(`\n=== SKILL.toml ===`);
    parts.push(dirContents["SKILL.toml"]);
  }

  if (dirContents["SKILL.md"]) {
    parts.push(`\n=== SKILL.md ===`);
    parts.push(dirContents["SKILL.md"]);
  }

  if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
    parts.push(`\n=== 脚本文件 ===`);
    for (const [name, content] of Object.entries(dirContents.scripts)) {
      parts.push(`\n--- ${name} ---`);
      const contentStr = typeof content === 'string' ? content : String(content || '');
      parts.push(contentStr.substring(0, 2000));
    }
  }

  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请以 JSON 格式返回：{"sceneId": "...", "sceneName": "..."}`;
}

function buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent) {
  const parts = [];

  parts.push(`=== 目录结构 ===`);
  parts.push(dirContents.tree || "(empty)");

  if (dirContents["scene.toml"]) {
    parts.push(`\n=== scene.toml ===`);
    parts.push(dirContents["scene.toml"]);
  }

  if (dirContents["SKILL.toml"]) {
    parts.push(`\n=== SKILL.toml ===`);
    parts.push(dirContents["SKILL.toml"]);
  }

  if (dirContents["SKILL.md"]) {
    parts.push(`\n=== SKILL.md ===`);
    parts.push(dirContents["SKILL.md"]);
  }

  // Include index.html content (key addition)
  if (indexHtmlContent && typeof indexHtmlContent === 'string') {
    parts.push(`\n=== index.html ===`);
    // Limit to first 15000 chars to avoid token limits
    parts.push(indexHtmlContent.substring(0, 15000));
  }

  if (dirContents.scripts && Object.keys(dirContents.scripts).length > 0) {
    parts.push(`\n=== 脚本文件 ===`);
    for (const [name, content] of Object.entries(dirContents.scripts)) {
      parts.push(`\n--- ${name} ---`);
      const contentStr = typeof content === 'string' ? content : String(content || '');
      parts.push(contentStr.substring(0, 3000));
    }
  }

  return `以下是场景目录 "${sourceDir}" 的内容：\n\n${parts.join("\n")}\n\n请分析以上代码，提取完整的场景信息。`;
}

function extractJsonFromResponse(text) {
  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
  if (codeBlockMatch) return JSON.parse(codeBlockMatch[1]);

  const jsonMatch = text.match(
    /\{[\s\S]*"sceneId"[\s\S]*"sceneName"[\s\S]*\}/
  );
  if (jsonMatch) return JSON.parse(jsonMatch[0]);

  return JSON.parse(text);
}

function extractSceneInfo(text) {
  // Try code block first
  const codeBlockMatch = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
  if (codeBlockMatch) {
    try {
      return JSON.parse(codeBlockMatch[1]);
    } catch (e) {
      // fall through
    }
  }

  // Try to find JSON object with sceneId
  const jsonMatch = text.match(/\{[\s\S]*"sceneId"[\s\S]*\}/);
  if (jsonMatch) {
    try {
      return JSON.parse(jsonMatch[0]);
    } catch (e) {
      // fall through
    }
  }

  // Last resort: parse entire text
  try {
    return JSON.parse(text);
  } catch (e) {
    throw new Error("Failed to extract valid SceneInfo JSON from LLM response");
  }
}

function analyzeScene(sourceDir, dirContents, { apiKey, baseUrl, model }) {
  const userPrompt = buildAnalyzePrompt(sourceDir, dirContents);

  const requestBody = JSON.stringify({
    model,
    messages: [
      { role: "system", content: SYSTEM_PROMPT },
      { role: "user", content: userPrompt },
    ],
    temperature: 0.1,
    max_tokens: 256,
  });

  return new Promise((resolve, reject) => {
    const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
    const options = {
      hostname: url.hostname,
      port: url.port || (url.protocol === "https:" ? 443 : 80),
      path: url.pathname,
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${apiKey}`,
        "Content-Length": Buffer.byteLength(requestBody),
      },
    };

    const req = https.request(options, (res) => {
      let data = "";
      res.on("data", (chunk) => (data += chunk));
      res.on("end", () => {
        if (res.statusCode !== 200) {
          return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
        }

        try {
          const parsed = JSON.parse(data);
          const content = parsed.choices?.[0]?.message?.content;
          if (!content) return reject(new Error("LLM returned empty response"));
          const result = extractJsonFromResponse(content);
          if (!result.sceneId || !result.sceneName) {
            return reject(
              new Error(`LLM response missing sceneId/sceneName: ${content}`)
            );
          }
          resolve(result);
        } catch (err) {
          reject(new Error(`Failed to parse LLM response: ${err.message}`));
        }
      });
    });

    req.on("error", reject);
    req.setTimeout(30000, () => {
      req.destroy(new Error("LLM API request timed out"));
    });

    req.write(requestBody);
    req.end();
  });
}

function analyzeSceneDeep(sourceDir, dirContents, indexHtmlContent, { apiKey, baseUrl, model }) {
  const userPrompt = buildDeepAnalyzePrompt(sourceDir, dirContents, indexHtmlContent);

  const requestBody = JSON.stringify({
    model,
    messages: [
      { role: "system", content: DEEP_SYSTEM_PROMPT },
      { role: "user", content: userPrompt },
    ],
    temperature: 0.1,
    max_tokens: 2048, // Increased for detailed response
  });

  return new Promise((resolve, reject) => {
    const url = new URL(baseUrl.replace(/\/v1\/?$/, "") + "/v1/chat/completions");
    const options = {
      hostname: url.hostname,
      port: url.port || (url.protocol === "https:" ? 443 : 80),
      path: url.pathname,
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${apiKey}`,
        "Content-Length": Buffer.byteLength(requestBody),
      },
    };

    const httpModule = url.protocol === "https:" ? https : http;
    const req = httpModule.request(options, (res) => {
      let data = "";
      res.on("data", (chunk) => (data += chunk));
      res.on("end", () => {
        if (res.statusCode !== 200) {
          return reject(new Error(`LLM API error ${res.statusCode}: ${data}`));
        }

        try {
          const parsed = JSON.parse(data);
          const content = parsed.choices?.[0]?.message?.content;
          if (!content) return reject(new Error("LLM returned empty response"));
          const result = extractSceneInfo(content);

          // Validate required fields
          if (!result.sceneId || !result.sceneName) {
            return reject(new Error(`LLM response missing sceneId/sceneName: ${content}`));
          }

          // Set defaults for optional fields
          result.sceneKind = result.sceneKind || "report_collection";
          result.apiEndpoints = result.apiEndpoints || [];
          result.staticParams = result.staticParams || {};
          result.columnDefs = result.columnDefs || [];
          result.businessLogic = result.businessLogic || {};

          resolve(result);
        } catch (err) {
          reject(new Error(`Failed to parse LLM response: ${err.message}`));
        }
      });
    });

    req.on("error", reject);
    req.setTimeout(60000, () => {
      req.destroy(new Error("LLM API request timed out"));
    });

    req.write(requestBody);
    req.end();
  });
}

module.exports = {
  buildAnalyzePrompt,
  extractJsonFromResponse,
  analyzeScene,
  // New exports
  buildDeepAnalyzePrompt,
  extractSceneInfo,
  analyzeSceneDeep,
};