feat: add generated scene skill platform hardening
This commit is contained in:
@@ -1,8 +1,23 @@
|
||||
const assert = require("assert");
|
||||
const fs = require("fs");
|
||||
const os = require("os");
|
||||
const path = require("path");
|
||||
const {
|
||||
buildAnalyzePrompt,
|
||||
extractJsonFromResponse,
|
||||
isRetryableLlmError,
|
||||
repairCommonJsonIssues,
|
||||
} = require("../frontend/scene-generator/llm-client");
|
||||
const {
|
||||
buildDeterministicSceneIr,
|
||||
readDirectory,
|
||||
validateSceneIdCandidate,
|
||||
} = require("../frontend/scene-generator/generator-runner");
|
||||
const {
|
||||
getGenerationBlockers,
|
||||
mergeSceneIr,
|
||||
sanitizeSceneIr,
|
||||
} = require("../frontend/scene-generator/server");
|
||||
|
||||
function testBuildAnalyzePromptIncludesFileContents() {
|
||||
const dirContents = {
|
||||
@@ -41,5 +56,263 @@ function testExtractJsonFromResponse() {
|
||||
console.log("PASS: testExtractJsonFromResponse");
|
||||
}
|
||||
|
||||
function testExtractJsonFromResponseRepairsMissingArrayComma() {
|
||||
const malformed =
|
||||
'{"sceneId":"marketing-zero-consumer-report","evidence":[{"kind":"a"} {"kind":"b"}],"sceneName":"营销"}';
|
||||
|
||||
const result = extractJsonFromResponse(malformed);
|
||||
|
||||
assert.strictEqual(result.sceneId, "marketing-zero-consumer-report");
|
||||
assert.strictEqual(Array.isArray(result.evidence), true);
|
||||
assert.strictEqual(result.evidence.length, 2);
|
||||
console.log("PASS: testExtractJsonFromResponseRepairsMissingArrayComma");
|
||||
}
|
||||
|
||||
function testRepairCommonJsonIssuesRemovesTrailingCommas() {
|
||||
const malformed =
|
||||
'{\n "sceneId": "marketing-zero-consumer-report",\n "evidence": [{"kind":"a",},],\n}';
|
||||
const repaired = repairCommonJsonIssues(malformed);
|
||||
const parsed = JSON.parse(repaired);
|
||||
|
||||
assert.strictEqual(parsed.sceneId, "marketing-zero-consumer-report");
|
||||
assert.strictEqual(parsed.evidence.length, 1);
|
||||
console.log("PASS: testRepairCommonJsonIssuesRemovesTrailingCommas");
|
||||
}
|
||||
|
||||
function testIsRetryableLlmErrorRecognizesTimeouts() {
|
||||
assert.strictEqual(isRetryableLlmError(new Error("LLM API request timed out")), true);
|
||||
assert.strictEqual(isRetryableLlmError(new Error("LLM API error 503: upstream unavailable")), true);
|
||||
assert.strictEqual(isRetryableLlmError(new Error("LLM response missing sceneId")), false);
|
||||
console.log("PASS: testIsRetryableLlmErrorRecognizesTimeouts");
|
||||
}
|
||||
|
||||
function testDeterministicNamingAvoidsDegenerateSlugFallback() {
|
||||
const sceneIr = buildDeterministicSceneIr(
|
||||
{ deterministicSignals: {} },
|
||||
"D:/tmp/营销2.0零度户报表数据生成"
|
||||
);
|
||||
|
||||
assert.strictEqual(sceneIr.sceneId, "marketing-zero-consumer-report");
|
||||
assert.strictEqual(sceneIr.sceneIdDiagnostics.valid, true);
|
||||
assert.strictEqual(sceneIr.sceneIdDiagnostics.candidateSource, "deterministic_keywords");
|
||||
console.log("PASS: testDeterministicNamingAvoidsDegenerateSlugFallback");
|
||||
}
|
||||
|
||||
function testValidateSceneIdCandidateRejectsLowEntropyIds() {
|
||||
const invalid = validateSceneIdCandidate("2-0", {
|
||||
sceneName: "营销2.0零度户报表数据生成",
|
||||
sourceDir: "D:/tmp/营销2.0零度户报表数据生成",
|
||||
});
|
||||
|
||||
assert.strictEqual(invalid.valid, false);
|
||||
assert.ok(
|
||||
["numeric_only_scene_id", "numeric_dominant_scene_id", "scene_id_too_short"].includes(invalid.reason),
|
||||
`unexpected invalid reason: ${invalid.reason}`
|
||||
);
|
||||
console.log("PASS: testValidateSceneIdCandidateRejectsLowEntropyIds");
|
||||
}
|
||||
|
||||
function testMergeSceneIrPrefersValidSceneIdOverInvalidLlmValue() {
|
||||
const deterministic = sanitizeSceneIr({
|
||||
sceneId: "marketing-zero-consumer-report",
|
||||
sceneIdDiagnostics: {
|
||||
candidateSource: "deterministic_keywords",
|
||||
valid: true,
|
||||
candidates: [{ value: "marketing-zero-consumer-report", source: "deterministic_keywords", valid: true }],
|
||||
},
|
||||
sceneName: "营销2.0零度户报表数据生成",
|
||||
bootstrap: { expectedDomain: "yx.gs.sgcc.com.cn", targetUrl: "http://yx.gs.sgcc.com.cn" },
|
||||
workflowSteps: [{ type: "request" }],
|
||||
apiEndpoints: [{ name: "userList", url: "http://yx.gs.sgcc.com.cn/list", method: "POST" }],
|
||||
validationHints: { runtimeCompatible: true },
|
||||
readiness: { level: "B" },
|
||||
});
|
||||
const llm = sanitizeSceneIr({
|
||||
sceneId: "2-0",
|
||||
sceneIdDiagnostics: {
|
||||
candidateSource: "llm_semantic",
|
||||
valid: false,
|
||||
invalidReason: "numeric_dominant_scene_id",
|
||||
candidates: [{ value: "2-0", source: "llm_semantic", valid: false, reason: "numeric_dominant_scene_id" }],
|
||||
},
|
||||
sceneName: "营销2.0零度户报表数据生成",
|
||||
bootstrap: { expectedDomain: "yx.gs.sgcc.com.cn", targetUrl: "http://yx.gs.sgcc.com.cn" },
|
||||
workflowSteps: [{ type: "request" }],
|
||||
apiEndpoints: [{ name: "userList", url: "http://yx.gs.sgcc.com.cn/list", method: "POST" }],
|
||||
validationHints: { runtimeCompatible: true },
|
||||
readiness: { level: "B" },
|
||||
});
|
||||
const warnings = [];
|
||||
|
||||
const merged = mergeSceneIr(deterministic, llm, warnings);
|
||||
|
||||
assert.strictEqual(merged.sceneId, "marketing-zero-consumer-report");
|
||||
assert.strictEqual(merged.sceneIdDiagnostics.valid, true);
|
||||
assert.ok(warnings.some((item) => item.includes("SceneId conflict")));
|
||||
console.log("PASS: testMergeSceneIrPrefersValidSceneIdOverInvalidLlmValue");
|
||||
}
|
||||
|
||||
function testGetGenerationBlockersRejectsInvalidSceneId() {
|
||||
const blockers = getGenerationBlockers({
|
||||
sceneIr: {
|
||||
sceneIdDiagnostics: {
|
||||
valid: false,
|
||||
invalidReason: "numeric_dominant_scene_id",
|
||||
},
|
||||
},
|
||||
sceneId: "2-0",
|
||||
sceneName: "营销2.0零度户报表数据生成",
|
||||
sourceDir: "D:/tmp/营销2.0零度户报表数据生成",
|
||||
});
|
||||
|
||||
assert.ok(
|
||||
blockers.some((item) => item.startsWith("invalid_scene_id:")),
|
||||
`expected invalid_scene_id blocker, got ${JSON.stringify(blockers)}`
|
||||
);
|
||||
assert.ok(blockers.includes("analysis_invalid_scene_id:numeric_dominant_scene_id"));
|
||||
console.log("PASS: testGetGenerationBlockersRejectsInvalidSceneId");
|
||||
}
|
||||
|
||||
function testBootstrapPrefersBusinessEntryOverLocalhostExport() {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "sgclaw-bootstrap-"));
|
||||
const sceneDir = path.join(tempRoot, "bootstrap");
|
||||
fs.mkdirSync(sceneDir);
|
||||
fs.writeFileSync(
|
||||
path.join(sceneDir, "index.html"),
|
||||
`<!doctype html><html><body><script>
|
||||
const sourceUrl = "http://yx.gs.sgcc.com.cn";
|
||||
const apiUrl = "http://yxgateway.gs.sgcc.com.cn/api";
|
||||
function getRows() {
|
||||
return $.ajax({ url: "http://yxgateway.gs.sgcc.com.cn/marketing/userList", type: "POST" });
|
||||
}
|
||||
function exportExcel() {
|
||||
return $.ajax({ url: "http://localhost:13313/SurfaceServices/personalBread/export/faultDetailsExportXLSX", type: "POST" });
|
||||
}
|
||||
</script></body></html>`,
|
||||
"utf8"
|
||||
);
|
||||
|
||||
const sceneIr = readDirectory(sceneDir).deterministic;
|
||||
|
||||
assert.strictEqual(sceneIr.bootstrap.expectedDomain, "yx.gs.sgcc.com.cn");
|
||||
assert.strictEqual(sceneIr.bootstrap.targetUrl, "http://yx.gs.sgcc.com.cn/");
|
||||
console.log("PASS: testBootstrapPrefersBusinessEntryOverLocalhostExport");
|
||||
}
|
||||
|
||||
function testBootstrapBecomesUnresolvedWhenOnlyLocalhostExists() {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "sgclaw-bootstrap-local-"));
|
||||
const sceneDir = path.join(tempRoot, "bootstrap-local");
|
||||
fs.mkdirSync(sceneDir);
|
||||
fs.writeFileSync(
|
||||
path.join(sceneDir, "index.html"),
|
||||
`<!doctype html><html><body><script>
|
||||
function exportExcel() {
|
||||
return $.ajax({ url: "http://localhost:13313/SurfaceServices/personalBread/export/faultDetailsExportXLSX", type: "POST" });
|
||||
}
|
||||
</script></body></html>`,
|
||||
"utf8"
|
||||
);
|
||||
|
||||
const sceneIr = readDirectory(sceneDir).deterministic;
|
||||
|
||||
assert.strictEqual(sceneIr.bootstrap.expectedDomain, "");
|
||||
assert.strictEqual(sceneIr.bootstrap.targetUrl, "");
|
||||
assert.ok(sceneIr.readiness.missingPieces.includes("bootstrap_target"));
|
||||
console.log("PASS: testBootstrapBecomesUnresolvedWhenOnlyLocalhostExists");
|
||||
}
|
||||
|
||||
function testWorkflowClassificationPrefersPaginatedOverGenericModeNoise() {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "sgclaw-workflow-"));
|
||||
const sceneDir = path.join(tempRoot, "workflow");
|
||||
fs.mkdirSync(sceneDir);
|
||||
fs.writeFileSync(
|
||||
path.join(sceneDir, "index.html"),
|
||||
`<!doctype html><html><body><script>
|
||||
const type = "list";
|
||||
const status = "ready";
|
||||
async function loadData(page, pageSize) {
|
||||
return $.ajax({ url: "http://yx.gs.sgcc.com.cn/marketing/userList", type: "POST", data: JSON.stringify({ page, pageSize }) });
|
||||
}
|
||||
async function getChargeInfo(custNo) {
|
||||
return $.ajax({ url: "http://yx.gs.sgcc.com.cn/marketing/userCharges", type: "POST", data: JSON.stringify({ custNo }) });
|
||||
}
|
||||
function exportExcel(rows) { return rows.length; }
|
||||
function run(rows) {
|
||||
return rows.filter((row) => row.charge !== 0);
|
||||
}
|
||||
</script></body></html>`,
|
||||
"utf8"
|
||||
);
|
||||
|
||||
const sceneIr = readDirectory(sceneDir).deterministic;
|
||||
|
||||
assert.strictEqual(sceneIr.workflowArchetype, "paginated_enrichment");
|
||||
assert.ok(sceneIr.workflowEvidence.paginationFields.length > 0);
|
||||
assert.ok(sceneIr.workflowEvidence.secondaryRequestEntries.length > 0);
|
||||
assert.ok(sceneIr.workflowEvidence.postProcessSteps.length > 0);
|
||||
console.log("PASS: testWorkflowClassificationPrefersPaginatedOverGenericModeNoise");
|
||||
}
|
||||
|
||||
function testWorkflowClassificationDoesNotEmitPaginatedWithoutPostProcess() {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "sgclaw-workflow-no-post-"));
|
||||
const sceneDir = path.join(tempRoot, "workflow-no-post");
|
||||
fs.mkdirSync(sceneDir);
|
||||
fs.writeFileSync(
|
||||
path.join(sceneDir, "index.html"),
|
||||
`<!doctype html><html><body><script>
|
||||
async function loadData(page, pageSize) {
|
||||
return $.ajax({ url: "http://yx.gs.sgcc.com.cn/marketing/userList", type: "POST", data: JSON.stringify({ page, pageSize }) });
|
||||
}
|
||||
async function getChargeInfo(custNo) {
|
||||
return $.ajax({ url: "http://yx.gs.sgcc.com.cn/marketing/userCharges", type: "POST", data: JSON.stringify({ custNo }) });
|
||||
}
|
||||
</script></body></html>`,
|
||||
"utf8"
|
||||
);
|
||||
|
||||
const sceneIr = readDirectory(sceneDir).deterministic;
|
||||
|
||||
assert.notStrictEqual(sceneIr.workflowArchetype, "paginated_enrichment");
|
||||
console.log("PASS: testWorkflowClassificationDoesNotEmitPaginatedWithoutPostProcess");
|
||||
}
|
||||
|
||||
function testGenerationBlockersIncludeFailedReadinessGates() {
|
||||
const blockers = getGenerationBlockers({
|
||||
sceneIr: {
|
||||
readiness: {
|
||||
gates: [
|
||||
{ name: "bootstrap_resolved", passed: false, reason: "bootstrap_target" },
|
||||
{ name: "request_contract_complete", passed: false, reason: "request_endpoint" },
|
||||
{ name: "response_contract_complete", passed: false, reason: "response_path" },
|
||||
{ name: "workflow_contract_complete", passed: false, reason: "post_process" },
|
||||
{ name: "workflow_complete_for_archetype", passed: false, reason: "post_process" },
|
||||
],
|
||||
},
|
||||
},
|
||||
sceneId: "marketing-zero-consumer-report",
|
||||
sceneName: "营销2.0零度户报表数据生成",
|
||||
sourceDir: "D:/tmp/营销2.0零度户报表数据生成",
|
||||
});
|
||||
|
||||
assert.ok(blockers.includes("gate_failed:bootstrap_resolved:bootstrap_target"));
|
||||
assert.ok(blockers.includes("gate_failed:request_contract_complete:request_endpoint"));
|
||||
assert.ok(blockers.includes("gate_failed:response_contract_complete:response_path"));
|
||||
assert.ok(blockers.includes("gate_failed:workflow_contract_complete:post_process"));
|
||||
assert.ok(blockers.includes("gate_failed:workflow_complete_for_archetype:post_process"));
|
||||
console.log("PASS: testGenerationBlockersIncludeFailedReadinessGates");
|
||||
}
|
||||
|
||||
testBuildAnalyzePromptIncludesFileContents();
|
||||
testExtractJsonFromResponse();
|
||||
testExtractJsonFromResponseRepairsMissingArrayComma();
|
||||
testRepairCommonJsonIssuesRemovesTrailingCommas();
|
||||
testIsRetryableLlmErrorRecognizesTimeouts();
|
||||
testDeterministicNamingAvoidsDegenerateSlugFallback();
|
||||
testValidateSceneIdCandidateRejectsLowEntropyIds();
|
||||
testMergeSceneIrPrefersValidSceneIdOverInvalidLlmValue();
|
||||
testGetGenerationBlockersRejectsInvalidSceneId();
|
||||
testBootstrapPrefersBusinessEntryOverLocalhostExport();
|
||||
testBootstrapBecomesUnresolvedWhenOnlyLocalhostExists();
|
||||
testWorkflowClassificationPrefersPaginatedOverGenericModeNoise();
|
||||
testWorkflowClassificationDoesNotEmitPaginatedWithoutPostProcess();
|
||||
testGenerationBlockersIncludeFailedReadinessGates();
|
||||
|
||||
Reference in New Issue
Block a user