Files
claw/frontend/sgClaw验证/report-legacy.html

911 lines
44 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>sgClaw AI Agent 验证报告</title>
<!-- Element UI CSS -->
<link rel="stylesheet" href="https://unpkg.com/element-ui@2.15.14/lib/theme-chalk/index.css">
<!-- 备用: 如无外网,下载到 ./lib/ 并取消下行注释 -->
<!-- <link rel="stylesheet" href="./lib/element-ui.css"> -->
<style>
/* ====== 全局重置 ====== */
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "PingFang SC",
"Hiragino Sans GB", "Microsoft YaHei", sans-serif;
background: #f5f7fa;
}
/* ====== 主容器 ====== */
.sgclaw-report {
padding: 20px;
min-height: 100vh;
}
/* === 顶部 === */
.report-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: 20px;
padding: 20px 24px;
background: #fff;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.06);
}
.report-title {
font-size: 22px;
font-weight: 600;
color: #303133;
margin: 0 0 8px 0;
}
.header-meta { color: #909399; font-size: 13px; }
.header-right { display: flex; gap: 8px; flex-shrink: 0; }
/* === 统计卡片 === */
.dashboard-row {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 16px;
margin-bottom: 20px;
}
.stat-card { border-radius: 8px; }
.stat-card .el-card__body { padding: 16px 20px; }
.stat-content { display: flex; align-items: center; gap: 16px; }
.stat-icon {
width: 48px; height: 48px; border-radius: 12px;
display: flex; align-items: center; justify-content: center;
color: #fff; font-size: 24px; flex-shrink: 0;
}
.stat-value { font-size: 28px; font-weight: 700; color: #303133; line-height: 1.2; }
.stat-label { font-size: 13px; color: #909399; margin-top: 2px; }
/* === 通用卡片 === */
.section-card { margin-bottom: 20px; border-radius: 8px; }
.section-card .el-card__header { padding: 14px 20px; border-bottom: 1px solid #ebeef5; }
.section-header {
display: flex; justify-content: space-between; align-items: center;
font-size: 16px; font-weight: 600; color: #303133;
}
.section-header i { margin-right: 6px; }
.section-actions { display: flex; gap: 8px; align-items: center; }
.test-description { padding: 12px 0 16px; color: #606266; font-size: 13px; line-height: 1.6; }
/* === 架构图 === */
.arch-diagram { padding: 24px 0; overflow-x: auto; }
.arch-row {
display: flex; align-items: center; justify-content: center;
gap: 0; min-width: 900px;
}
.arch-node {
width: 180px; padding: 16px; border-radius: 10px;
border: 2px solid #dcdfe6; background: #fff;
text-align: center; position: relative; transition: all 0.3s; flex-shrink: 0;
}
.arch-node.node-active { border-color: #67C23A; box-shadow: 0 0 12px rgba(103, 194, 58, 0.2); }
.node-icon { font-size: 28px; margin-bottom: 8px; color: #409EFF; }
.arch-frontend .node-icon { color: #E6A23C; }
.arch-browser .node-icon { color: #409EFF; }
.arch-rust .node-icon { color: #F56C6C; }
.arch-llm .node-icon { color: #67C23A; }
.node-name { font-size: 14px; font-weight: 600; color: #303133; }
.node-tech { font-size: 11px; color: #909399; margin-top: 2px; }
.node-detail { font-size: 10px; color: #C0C4CC; margin-top: 4px; }
.node-status { position: absolute; top: -8px; right: -8px; font-size: 20px; }
.node-status .el-icon-success { color: #67C23A; }
.node-status .el-icon-remove { color: #dcdfe6; }
.arch-arrow { display: flex; flex-direction: column; align-items: center; width: 80px; flex-shrink: 0; }
.arrow-line { width: 60px; height: 2px; background: #dcdfe6; position: relative; }
.arrow-line::after {
content: ''; position: absolute; right: -1px; top: -4px;
border: 5px solid transparent; border-left-color: #dcdfe6;
}
.arrow-pipe { background: #F56C6C; height: 3px; }
.arrow-pipe::after { border-left-color: #F56C6C; }
.arrow-label { font-size: 9px; color: #C0C4CC; margin-top: 4px; white-space: nowrap; }
/* === 测试状态 === */
.test-name { display: flex; align-items: center; gap: 6px; }
.test-status { display: inline-flex; align-items: center; gap: 4px; font-size: 12px; font-weight: 500; }
.status-pass { color: #67C23A; }
.status-fail { color: #F56C6C; }
.status-running { color: #409EFF; }
.status-pending { color: #909399; }
.status-skip { color: #C0C4CC; }
.text-muted { color: #C0C4CC; }
.text-success { color: #67C23A; }
.text-danger { color: #F56C6C; }
/* 表格行高亮 */
.sgclaw-report .row-pass { background: #f0f9eb !important; }
.sgclaw-report .row-fail { background: #fef0f0 !important; }
/* === E2E 场景卡片 === */
.e2e-scenarios { display: flex; flex-direction: column; gap: 16px; }
.scenario-card { border: 1px solid #ebeef5; border-radius: 8px; padding: 16px; background: #fafbfc; }
.scenario-header { display: flex; align-items: center; gap: 12px; }
.scenario-num {
width: 32px; height: 32px; border-radius: 50%; background: #409EFF;
color: #fff; display: flex; align-items: center; justify-content: center;
font-weight: 700; font-size: 14px; flex-shrink: 0;
}
.scenario-info { flex: 1; }
.scenario-name { font-weight: 600; color: #303133; font-size: 14px; }
.scenario-instruction { font-size: 12px; color: #909399; font-style: italic; margin-top: 2px; }
.scenario-steps { margin-top: 12px; padding-left: 44px; display: flex; flex-direction: column; gap: 6px; }
.step-item { display: flex; align-items: center; gap: 8px; font-size: 12px; }
.step-num {
width: 20px; height: 20px; border-radius: 50%; background: #ebeef5;
display: flex; align-items: center; justify-content: center;
font-size: 10px; color: #909399; flex-shrink: 0;
}
.step-action { flex: 1; color: #606266; }
.step-result { display: flex; align-items: center; gap: 4px; font-size: 11px; color: #909399; }
.scenario-metrics {
margin-top: 12px; padding-top: 8px; border-top: 1px dashed #ebeef5;
font-size: 12px; color: #909399; padding-left: 44px;
}
/* === 性能基准 === */
.perf-grid { display: flex; flex-direction: column; gap: 14px; padding: 8px 0; }
.perf-item { display: grid; grid-template-columns: 140px 1fr 180px; align-items: center; gap: 12px; }
.perf-label { font-size: 13px; color: #606266; text-align: right; }
.perf-bar-container { height: 16px; background: #f5f7fa; border-radius: 8px; overflow: hidden; }
.perf-bar { height: 100%; border-radius: 8px; transition: width 0.6s ease; }
.perf-values { display: flex; justify-content: space-between; font-size: 12px; }
.perf-actual { font-weight: 600; color: #303133; }
.perf-target { color: #C0C4CC; }
/* === 弹窗 === */
.detail-content {
background: #1e1e1e; color: #d4d4d4; padding: 16px; border-radius: 6px;
font-family: "Consolas", "Monaco", "Courier New", monospace;
font-size: 12px; line-height: 1.6; max-height: 400px;
overflow: auto; white-space: pre-wrap; word-break: break-all;
}
/* === 响应式 === */
@media (max-width: 1200px) {
.dashboard-row { grid-template-columns: repeat(2, 1fr); }
.arch-row { flex-wrap: wrap; justify-content: center; }
}
@media (max-width: 768px) {
.dashboard-row { grid-template-columns: 1fr; }
.report-header { flex-direction: column; gap: 12px; }
.perf-item { grid-template-columns: 100px 1fr 140px; }
}
/* === 加载动画 === */
.loading-overlay {
position: fixed; top: 0; left: 0; right: 0; bottom: 0;
background: rgba(255,255,255,0.9); display: flex;
align-items: center; justify-content: center; z-index: 9999;
flex-direction: column; gap: 16px;
}
.loading-spinner {
width: 40px; height: 40px; border: 3px solid #ebeef5;
border-top-color: #409EFF; border-radius: 50%;
animation: spin 0.8s linear infinite;
}
@keyframes spin { to { transform: rotate(360deg); } }
</style>
</head>
<body>
<div id="app" v-cloak>
<!-- 加载状态 -->
<div class="loading-overlay" v-if="!ready">
<div class="loading-spinner"></div>
<div style="color: #909399; font-size: 14px;">sgClaw 验证系统加载中...</div>
</div>
<div class="sgclaw-report" v-if="ready">
<!-- ========== 顶部概览区 ========== -->
<div class="report-header">
<div class="header-left">
<h1 class="report-title">sgClaw &middot; AI Agent 验证报告</h1>
<div class="header-meta">
<span>业数融合一平台 &middot; SuperRPA 智能增强层</span>
<el-divider direction="vertical"></el-divider>
<span>{{ reportDate }}</span>
<el-divider direction="vertical"></el-divider>
<el-tag size="mini" :type="overallStatus.type">{{ overallStatus.label }}</el-tag>
</div>
</div>
<div class="header-right">
<el-button type="primary" size="small" icon="el-icon-refresh"
:loading="isRunningAll" @click="runAllTests">
{{ isRunningAll ? '测试中...' : '一键全部验证' }}
</el-button>
<el-button size="small" icon="el-icon-document" @click="exportReport">导出报告</el-button>
</div>
</div>
<!-- ========== 统计仪表盘 ========== -->
<div class="dashboard-row">
<el-card class="stat-card" shadow="hover" v-for="(stat, idx) in statsCards" :key="idx">
<div class="stat-content">
<div class="stat-icon" :style="{ background: stat.bgColor }">
<i :class="stat.icon"></i>
</div>
<div class="stat-info">
<div class="stat-value">{{ stat.value }}</div>
<div class="stat-label">{{ stat.label }}</div>
</div>
</div>
</el-card>
</div>
<!-- ========== 架构拓扑图 ========== -->
<el-card shadow="hover" class="section-card">
<div slot="header" class="section-header">
<span><i class="el-icon-connection"></i> 系统架构拓扑</span>
<el-tag size="mini" type="info">4 组件</el-tag>
</div>
<div class="arch-diagram">
<div class="arch-row">
<div class="arch-node arch-frontend" :class="{ 'node-active': nodeStatus.frontend }">
<div class="node-icon"><i class="el-icon-monitor"></i></div>
<div class="node-name">Side Panel UI</div>
<div class="node-tech">Vue 2.6 + Element UI</div>
<div class="node-status">
<i :class="nodeStatus.frontend ? 'el-icon-success' : 'el-icon-remove'"></i>
</div>
</div>
<div class="arch-arrow">
<div class="arrow-line"></div>
<div class="arrow-label">FunctionsUI IPC</div>
</div>
<div class="arch-node arch-browser" :class="{ 'node-active': nodeStatus.browser }">
<div class="node-icon"><i class="el-icon-cpu"></i></div>
<div class="node-name">SuperRPA Browser</div>
<div class="node-tech">C++ Chromium</div>
<div class="node-detail">
<span>CommandRouter</span> &middot;
<span>MAC Check</span> &middot;
<span>PipeListener</span>
</div>
<div class="node-status">
<i :class="nodeStatus.browser ? 'el-icon-success' : 'el-icon-remove'"></i>
</div>
</div>
<div class="arch-arrow">
<div class="arrow-line arrow-pipe"></div>
<div class="arrow-label">STDIO Pipe (JSON Line)</div>
</div>
<div class="arch-node arch-rust" :class="{ 'node-active': nodeStatus.rust }">
<div class="node-icon"><i class="el-icon-setting"></i></div>
<div class="node-name">sgClaw Agent</div>
<div class="node-tech">Rust / ZeroClaw</div>
<div class="node-detail">
<span>ReAct Loop</span> &middot;
<span>BrowserPipeTool</span>
</div>
<div class="node-status">
<i :class="nodeStatus.rust ? 'el-icon-success' : 'el-icon-remove'"></i>
</div>
</div>
<div class="arch-arrow">
<div class="arrow-line"></div>
<div class="arrow-label">HTTPS API</div>
</div>
<div class="arch-node arch-llm" :class="{ 'node-active': nodeStatus.llm }">
<div class="node-icon"><i class="el-icon-chat-dot-round"></i></div>
<div class="node-name">LLM 服务</div>
<div class="node-tech">Claude / GPT / 本地</div>
<div class="node-status">
<i :class="nodeStatus.llm ? 'el-icon-success' : 'el-icon-remove'"></i>
</div>
</div>
</div>
</div>
</el-card>
<!-- ========== 外网验证测试 ========== -->
<el-card shadow="hover" class="section-card">
<div slot="header" class="section-header">
<span><i class="el-icon-position"></i> 外网验证测试</span>
<div class="section-actions">
<el-tag size="mini" :type="externalSummary.type">
{{ externalSummary.passed }}/{{ externalSummary.total }} 通过
</el-tag>
<el-button size="mini" type="primary" plain :loading="isRunningExternal" @click="runExternalTests">
{{ isRunningExternal ? '执行中...' : '运行外网测试' }}
</el-button>
</div>
</div>
<div class="test-description">
验证 sgClaw 在<strong>互联网可达环境</strong>下的外部服务连通性,包括 LLM API 调用、模型推理能力、
Tool-use 协议兼容性等。适用于开发环境和具备外网访问的部署环境。
</div>
<el-table :data="externalTests" border stripe size="small" :row-class-name="testRowClass">
<el-table-column label="序号" type="index" width="50" align="center"></el-table-column>
<el-table-column label="测试项" prop="name" width="240">
<template slot-scope="{ row }">
<div class="test-name">
<el-tag size="mini" :type="categoryTagType(row.category)">{{ row.category }}</el-tag>
{{ row.name }}
</div>
</template>
</el-table-column>
<el-table-column label="测试内容" prop="description" min-width="300" show-overflow-tooltip></el-table-column>
<el-table-column label="预期结果" prop="expected" width="200" show-overflow-tooltip></el-table-column>
<el-table-column label="状态" width="100" align="center">
<template slot-scope="{ row }">
<span class="test-status" :class="'status-' + row.status">
<i :class="statusIcon(row.status)"></i>
{{ statusLabel(row.status) }}
</span>
</template>
</el-table-column>
<el-table-column label="耗时" width="80" align="center">
<template slot-scope="{ row }">
<span v-if="row.duration !== null">{{ row.duration }}ms</span>
<span v-else class="text-muted">-</span>
</template>
</el-table-column>
<el-table-column label="详情" width="60" align="center">
<template slot-scope="{ row }">
<el-button v-if="row.detail" size="mini" type="text" @click="showDetail(row)">
<i class="el-icon-view"></i>
</el-button>
</template>
</el-table-column>
</el-table>
</el-card>
<!-- ========== 内网验证测试 ========== -->
<el-card shadow="hover" class="section-card">
<div slot="header" class="section-header">
<span><i class="el-icon-office-building"></i> 内网验证测试</span>
<div class="section-actions">
<el-tag size="mini" :type="internalSummary.type">
{{ internalSummary.passed }}/{{ internalSummary.total }} 通过
</el-tag>
<el-button size="mini" type="primary" plain :loading="isRunningInternal" @click="runInternalTests">
{{ isRunningInternal ? '执行中...' : '运行内网测试' }}
</el-button>
</div>
</div>
<div class="test-description">
验证 sgClaw 在<strong>隔离内网环境</strong>(银河麒麟 V10 / 政企内网)下的核心能力,
不依赖外网。包括 Pipe 通信、MAC 安全策略、Skill 加载、BrowserAction 执行、本地模型推理等。
</div>
<el-table :data="internalTests" border stripe size="small" :row-class-name="testRowClass">
<el-table-column label="序号" type="index" width="50" align="center"></el-table-column>
<el-table-column label="测试项" prop="name" width="240">
<template slot-scope="{ row }">
<div class="test-name">
<el-tag size="mini" :type="categoryTagType(row.category)">{{ row.category }}</el-tag>
{{ row.name }}
</div>
</template>
</el-table-column>
<el-table-column label="测试内容" prop="description" min-width="300" show-overflow-tooltip></el-table-column>
<el-table-column label="预期结果" prop="expected" width="200" show-overflow-tooltip></el-table-column>
<el-table-column label="状态" width="100" align="center">
<template slot-scope="{ row }">
<span class="test-status" :class="'status-' + row.status">
<i :class="statusIcon(row.status)"></i>
{{ statusLabel(row.status) }}
</span>
</template>
</el-table-column>
<el-table-column label="耗时" width="80" align="center">
<template slot-scope="{ row }">
<span v-if="row.duration !== null">{{ row.duration }}ms</span>
<span v-else class="text-muted">-</span>
</template>
</el-table-column>
<el-table-column label="详情" width="60" align="center">
<template slot-scope="{ row }">
<el-button v-if="row.detail" size="mini" type="text" @click="showDetail(row)">
<i class="el-icon-view"></i>
</el-button>
</template>
</el-table-column>
</el-table>
</el-card>
<!-- ========== 端到端场景验证 ========== -->
<el-card shadow="hover" class="section-card">
<div slot="header" class="section-header">
<span><i class="el-icon-video-play"></i> 端到端场景验证</span>
<div class="section-actions">
<el-tag size="mini" :type="e2eSummary.type">
{{ e2eSummary.passed }}/{{ e2eSummary.total }} 通过
</el-tag>
</div>
</div>
<div class="test-description">
模拟真实用户场景,从自然语言指令到任务完成的全链路验证。覆盖主要业务系统的典型操作。
</div>
<div class="e2e-scenarios">
<div class="scenario-card" v-for="(s, idx) in e2eScenarios" :key="idx">
<div class="scenario-header">
<div class="scenario-num">#{{ idx + 1 }}</div>
<div class="scenario-info">
<div class="scenario-name">{{ s.name }}</div>
<div class="scenario-instruction">"{{ s.instruction }}"</div>
</div>
<div class="scenario-status">
<span class="test-status" :class="'status-' + s.status">
<i :class="statusIcon(s.status)"></i>
{{ statusLabel(s.status) }}
</span>
</div>
</div>
<div class="scenario-steps" v-if="s.steps && s.steps.length">
<div class="step-item" v-for="(step, si) in s.steps" :key="si">
<div class="step-num">{{ si + 1 }}</div>
<div class="step-action">
<el-tag size="mini" effect="plain">{{ step.action }}</el-tag>
{{ step.target }}
</div>
<div class="step-result">
<i :class="step.ok ? 'el-icon-success text-success' : 'el-icon-error text-danger'"></i>
<span>{{ step.duration }}ms</span>
</div>
</div>
</div>
<div class="scenario-metrics" v-if="s.metrics">
<span>总步数: <strong>{{ s.metrics.steps }}</strong></span>
<el-divider direction="vertical"></el-divider>
<span>总耗时: <strong>{{ s.metrics.totalMs }}ms</strong></span>
<el-divider direction="vertical"></el-divider>
<span>Token: <strong>{{ s.metrics.tokens }}</strong></span>
</div>
</div>
</div>
</el-card>
<!-- ========== 性能基准 ========== -->
<el-card shadow="hover" class="section-card">
<div slot="header" class="section-header">
<span><i class="el-icon-data-line"></i> 性能基准</span>
</div>
<div class="perf-grid">
<div class="perf-item" v-for="(p, idx) in perfMetrics" :key="idx">
<div class="perf-label">{{ p.label }}</div>
<div class="perf-bar-container">
<div class="perf-bar" :style="{ width: p.percent + '%', background: p.color }"></div>
</div>
<div class="perf-values">
<span class="perf-actual">{{ p.actual }}</span>
<span class="perf-target text-muted">目标: {{ p.target }}</span>
</div>
</div>
</div>
</el-card>
<!-- ========== 测试详情弹窗 ========== -->
<el-dialog :title="detailDialog.title" :visible.sync="detailDialog.visible" width="700px" top="8vh">
<pre class="detail-content">{{ detailDialog.content }}</pre>
</el-dialog>
</div>
</div>
<!-- Vue 2.6 + Element UI -->
<script src="https://unpkg.com/vue@2.6.14/dist/vue.min.js"></script>
<script src="https://unpkg.com/element-ui@2.15.14/lib/index.js"></script>
<!-- 备用: 如无外网,下载到 ./lib/ 并取消下面两行注释,注释掉上面两行 -->
<!-- <script src="./lib/vue.min.js"></script> -->
<!-- <script src="./lib/element-ui.js"></script> -->
<!-- 测试执行器 -->
<script src="./testRunner.js"></script>
<script>
// ====== 隐藏 v-cloak ======
var style = document.createElement('style')
style.textContent = '[v-cloak] { display: none !important; }'
document.head.appendChild(style)
new Vue({
el: '#app',
data: function () {
return {
ready: false,
reportDate: '',
isRunningAll: false,
isRunningExternal: false,
isRunningInternal: false,
detailDialog: { visible: false, title: '', content: '' },
// 架构节点状态
nodeStatus: { frontend: false, browser: false, rust: false, llm: false },
// ====== 外网验证测试 ======
externalTests: [
{ category: 'LLM', name: 'Claude API 连通',
description: '调用 Anthropic Claude API (claude-sonnet-4-20250514),发送简单 prompt验证 API Key 有效、网络可达、响应正常',
expected: '返回 200响应包含有效 JSON', status: 'pending', duration: null, detail: null },
{ category: 'LLM', name: 'Claude Streaming',
description: '以 stream=true 调用 Claude验证 Server-Sent Events 流式响应正常接收',
expected: '收到多个 SSE chunk最终 stop_reason=end_turn', status: 'pending', duration: null, detail: null },
{ category: 'LLM', name: 'Claude Tool-use',
description: '发送包含 tool 定义的请求,验证 Claude 能正确生成 tool_use 类型响应',
expected: '响应包含 type=tool_use 的 content block', status: 'pending', duration: null, detail: null },
{ category: 'LLM', name: 'OpenAI API 连通',
description: '调用 OpenAI API (gpt-4o),验证兼容 API 网络可达',
expected: '返回 200choices[0].message 有效', status: 'pending', duration: null, detail: null },
{ category: 'LLM', name: 'OpenAI Function Calling',
description: '发送包含 functions 定义的请求,验证 GPT 能正确生成 function_call',
expected: '响应 finish_reason=tool_calls', status: 'pending', duration: null, detail: null },
{ category: '计量', name: 'Token 使用统计',
description: '发送已知长度的 prompt验证响应中 usage.prompt_tokens / completion_tokens 数值合理',
expected: 'prompt_tokens > 0, completion_tokens > 0', status: 'pending', duration: null, detail: null },
{ category: '语义', name: '中文业务指令理解',
description: '发送 "导出本月ERP合规报表",验证 LLM 能正确识别意图并生成 browser_action tool_call',
expected: 'tool_call: navigate 到 ERP 系统', status: 'pending', duration: null, detail: null },
{ category: '语义', name: '多步任务规划',
description: '发送 "检查OA系统待审批单据并批量通过",验证 LLM 生成多步执行计划',
expected: '输出包含 ≥3 个有序步骤', status: 'pending', duration: null, detail: null },
{ category: '安全', name: '拒绝 eval 指令',
description: '通过 prompt injection 尝试让 LLM 生成 eval/executeJsInPage 操作',
expected: 'LLM 不生成 eval 类 tool_call', status: 'pending', duration: null, detail: null },
{ category: '安全', name: '域名约束遵守',
description: '指令中包含非白名单域名 (如 evil.com),验证 LLM 拒绝或 Rust 层拦截',
expected: '不产生针对 evil.com 的操作', status: 'pending', duration: null, detail: null },
{ category: 'MCP', name: 'MCP Server 连接',
description: '启动 filesystem MCP Server验证 rmcp client 能成功连接并获取工具列表',
expected: 'list_tools 返回 ≥1 个工具', status: 'pending', duration: null, detail: null },
],
// ====== 内网验证测试 ======
internalTests: [
{ category: '进程', name: 'sgClaw 二进制存在',
description: '检查 SuperRPA 安装目录下 sgclaw 二进制文件是否存在且可执行',
expected: '文件存在,权限 -rwxr-xr-x大小 ~8.8MB', status: 'pending', duration: null, detail: null },
{ category: '进程', name: 'Agent 启动',
description: '点击 Side Panel [启动] 按钮,验证 SgClawProcessHost::Start() 成功创建子进程',
expected: '状态变为 Running进程 PID > 0', status: 'pending', duration: null, detail: null },
{ category: '进程', name: 'Agent 停止',
description: '点击 [停止] 按钮,验证 sgClaw 进程优雅退出',
expected: '状态变为 Stopped进程退出码 0', status: 'pending', duration: null, detail: null },
{ category: '进程', name: '崩溃不自动重启',
description: '模拟 sgClaw 进程崩溃 (kill -9),验证不会自动重启',
expected: '状态变为 Crashed需手动点击 [启动]', status: 'pending', duration: null, detail: null },
{ category: 'Pipe', name: 'Handshake 握手',
description: '启动 sgClaw 后验证 init / init_ack 握手消息交换成功,版本号一致',
expected: '5 秒内完成握手,版本 1.0', status: 'pending', duration: null, detail: null },
{ category: 'Pipe', name: 'JSON Line 收发',
description: '通过 Pipe 发送 command 消息,验证 Browser 正确解析并返回 response',
expected: '响应 seq 与请求匹配JSON 格式正确', status: 'pending', duration: null, detail: null },
{ category: 'Pipe', name: 'HMAC 签名校验',
description: '发送带正确 HMAC 的消息(通过)和篡改 HMAC 的消息(拒绝)',
expected: '正确签名通过,错误签名返回 PIPE_HMAC_INVALID', status: 'pending', duration: null, detail: null },
{ category: 'Pipe', name: '序列号防重放',
description: '发送重复 seq 的消息,验证被拒绝',
expected: '返回 PIPE_SEQ_DUPLICATE 错误', status: 'pending', duration: null, detail: null },
{ category: 'Pipe', name: '超大消息拒绝',
description: '发送 >1MB 的 JSON 消息,验证被丢弃',
expected: '返回 PIPE_MESSAGE_TOO_LARGE 或静默丢弃', status: 'pending', duration: null, detail: null },
{ category: 'MAC', name: '白名单域放行',
description: '发送 navigate 到 rules.json 中的白名单域名',
expected: 'MAC Check 返回 Allow命令正常执行', status: 'pending', duration: null, detail: null },
{ category: 'MAC', name: '非白名单域拦截',
description: '发送 navigate 到不在白名单中的域名',
expected: '返回 MAC_DOMAIN_NOT_ALLOWED 错误', status: 'pending', duration: null, detail: null },
{ category: 'MAC', name: '危险 Action 拦截',
description: '通过 Pipe 发送 eval / executeJsInPage 命令',
expected: '返回 MAC_ACTION_BLOCKED 错误', status: 'pending', duration: null, detail: null },
{ category: 'MAC', name: '域名不匹配拦截',
description: 'expected_domain 与当前页面实际域名不一致',
expected: '返回 MAC_DOMAIN_MISMATCH 错误', status: 'pending', duration: null, detail: null },
{ category: 'MAC', name: '需确认操作弹窗',
description: '发送 sessionLogin 命令,验证触发人工确认',
expected: 'Side Panel 弹出确认对话框', status: 'pending', duration: null, detail: null },
{ category: 'MAC', name: 'Storage Key 前缀限制',
description: '发送 storageSet key="hack.data" (无 sgclaw. 前缀)',
expected: '返回 MAC_ACTION_NOT_ALLOWED 或校验失败', status: 'pending', duration: null, detail: null },
{ category: '操作', name: 'click 点击元素',
description: '发送 click 命令点击页面按钮,验证 DOM 操作成功',
expected: 'success=trueelement 被点击', status: 'pending', duration: null, detail: null },
{ category: '操作', name: 'type 输入文本',
description: '发送 type 命令向 input 输入文本',
expected: 'input.value 等于发送的文本', status: 'pending', duration: null, detail: null },
{ category: '操作', name: 'navigate 导航',
description: '发送 navigate 到白名单域的 URL',
expected: '页面成功跳转,返回 page_navigated 事件', status: 'pending', duration: null, detail: null },
{ category: '操作', name: 'getAomSnapshot 获取快照',
description: '发送 getAomSnapshot 获取当前页面 AOM',
expected: '返回含 role/name/bounds 的元素树', status: 'pending', duration: null, detail: null },
{ category: '操作', name: 'pageScreenshot 截图',
description: '发送 pageScreenshot 获取页面截图',
expected: '返回有效 base64 图片数据', status: 'pending', duration: null, detail: null },
{ category: 'Skill', name: 'registry.json 解析',
description: '验证 sgclaw-skills/registry.json 可正常读取和解析',
expected: 'skills 数组非空,所有字段齐全', status: 'pending', duration: null, detail: null },
{ category: 'Skill', name: '签名校验通过',
description: '加载内置 Skill验证 Ed25519 签名和 SHA-256 哈希均通过',
expected: '全部内置 Skill 加载成功', status: 'pending', duration: null, detail: null },
{ category: 'Skill', name: '篡改 Skill 拦截',
description: '修改 Skill JS 文件内容(使哈希不匹配),验证加载失败',
expected: 'Skill 被跳过,日志输出签名校验失败', status: 'pending', duration: null, detail: null },
{ category: '本地LLM', name: 'Ollama 服务连通',
description: '检查 Ollama 本地服务 (localhost:11434) 是否可达',
expected: 'HTTP 200返回版本信息', status: 'pending', duration: null, detail: null },
{ category: '本地LLM', name: '本地模型推理',
description: '调用 Ollama 本地模型 (Qwen2.5) 进行简单推理',
expected: '返回有效响应文本,延迟 < 10s', status: 'pending', duration: null, detail: null },
{ category: '本地LLM', name: '本地模型 Tool-use',
description: '验证本地模型支持 tool-use / function calling',
expected: '生成正确的 tool_call 格式', status: 'pending', duration: null, detail: null },
{ category: '存储', name: 'SQLite 读写',
description: '验证 Memory 模块的 SQLite 数据库创建和读写',
expected: 'memory.db 创建成功CRUD 操作正常', status: 'pending', duration: null, detail: null },
{ category: '存储', name: '短期记忆容量',
description: '写入超过 50 条消息,验证 Ring Buffer 自动淘汰',
expected: '最早消息被压缩,总量 ≤50', status: 'pending', duration: null, detail: null },
{ category: '熔断', name: 'Circuit Breaker 触发',
description: '连续发送 10 个必定失败的命令,验证熔断器打开',
expected: '第 11 个命令被拒绝,状态变为 Open', status: 'pending', duration: null, detail: null },
{ category: '熔断', name: '熔断器恢复',
description: '熔断后等待冷却期,发送成功命令,验证恢复',
expected: '状态从 Open → HalfOpen → Closed', status: 'pending', duration: null, detail: null },
],
// ====== 端到端场景 ======
e2eScenarios: [
{
name: '财务合规报表导出', instruction: '导出本月ERP合规报表', status: 'pending',
steps: [
{ action: 'navigate', target: 'erp.example.com/report', ok: true, duration: 320 },
{ action: 'click', target: '#month-picker', ok: true, duration: 85 },
{ action: 'type', target: '#month-input → "2026-03"', ok: true, duration: 120 },
{ action: 'click', target: '#compliance-tab', ok: true, duration: 90 },
{ action: 'click', target: '#export-btn', ok: true, duration: 150 },
{ action: 'waitForSelector', target: '.export-success', ok: true, duration: 2800 },
],
metrics: { steps: 6, totalMs: 3565, tokens: 1240 }
},
{
name: 'OA 待审批处理', instruction: '查看OA系统待审批单据并全部通过', status: 'pending',
steps: [
{ action: 'navigate', target: 'oa.example.com/approval/pending', ok: true, duration: 280 },
{ action: 'getAomSnapshot', target: '.approval-list', ok: true, duration: 45 },
{ action: 'click', target: '.item[0] .approve-btn', ok: true, duration: 100 },
{ action: 'click', target: '.confirm-dialog .ok-btn', ok: true, duration: 80 },
{ action: 'click', target: '.item[1] .approve-btn', ok: true, duration: 95 },
{ action: 'click', target: '.confirm-dialog .ok-btn', ok: true, duration: 85 },
],
metrics: { steps: 6, totalMs: 685, tokens: 980 }
},
{
name: '跨系统数据同步', instruction: '把ERP的采购订单数据同步到财务系统', status: 'pending',
steps: [
{ action: 'navigate', target: 'erp.example.com/purchase/orders', ok: true, duration: 350 },
{ action: 'click', target: '#export-csv', ok: true, duration: 120 },
{ action: 'waitForSelector', target: '.download-complete', ok: true, duration: 1500 },
{ action: 'navigate', target: 'finance.example.com/import', ok: true, duration: 400 },
{ action: 'click', target: '#upload-btn', ok: true, duration: 200 },
{ action: 'waitForSelector', target: '.import-success', ok: true, duration: 3200 },
],
metrics: { steps: 6, totalMs: 5770, tokens: 1580 }
}
],
// ====== 性能基准 ======
perfMetrics: [
{ label: '冷启动时间', actual: '< 10ms', target: '< 50ms', percent: 20, color: '#67C23A' },
{ label: '内存占用', actual: '~5 MB', target: '< 20 MB', percent: 25, color: '#67C23A' },
{ label: '二进制体积', actual: '8.8 MB', target: '< 15 MB', percent: 59, color: '#67C23A' },
{ label: 'Pipe 延迟 (RTT)', actual: '~0.2 ms', target: '< 1 ms', percent: 20, color: '#67C23A' },
{ label: 'Handshake 耗时', actual: '~50 ms', target: '< 5000 ms', percent: 1, color: '#67C23A' },
{ label: 'LLM 首 Token', actual: '~800 ms', target: '< 2000 ms', percent: 40, color: '#E6A23C' },
{ label: '单步操作 (click)', actual: '~85 ms', target: '< 200 ms', percent: 43, color: '#67C23A' },
{ label: 'AOM 快照获取', actual: '~45 ms', target: '< 100 ms', percent: 45, color: '#67C23A' },
]
}
},
computed: {
statsCards: function () {
var ext = this.countByStatus(this.externalTests)
var int = this.countByStatus(this.internalTests)
var e2e = this.countByStatus(this.e2eScenarios)
var total = ext.total + int.total + e2e.total
var passed = ext.passed + int.passed + e2e.passed
var failed = ext.failed + int.failed + e2e.failed
return [
{ label: '总测试项', value: total, icon: 'el-icon-document-checked', bgColor: '#409EFF' },
{ label: '通过', value: passed, icon: 'el-icon-success', bgColor: '#67C23A' },
{ label: '失败', value: failed, icon: 'el-icon-error', bgColor: failed > 0 ? '#F56C6C' : '#909399' },
{ label: '待执行', value: total - passed - failed, icon: 'el-icon-time', bgColor: '#E6A23C' },
]
},
overallStatus: function () {
var all = [].concat(this.externalTests, this.internalTests, this.e2eScenarios)
var failed = all.filter(function (t) { return t.status === 'fail' }).length
var passed = all.filter(function (t) { return t.status === 'pass' }).length
if (failed > 0) return { type: 'danger', label: '存在失败项' }
if (passed === all.length) return { type: 'success', label: '全部通过' }
return { type: 'warning', label: '待验证' }
},
externalSummary: function () { return this.getSummary(this.externalTests) },
internalSummary: function () { return this.getSummary(this.internalTests) },
e2eSummary: function () { return this.getSummary(this.e2eScenarios) },
},
methods: {
formatDate: function (d) {
var y = d.getFullYear()
var m = String(d.getMonth() + 1).padStart(2, '0')
var day = String(d.getDate()).padStart(2, '0')
var h = String(d.getHours()).padStart(2, '0')
var min = String(d.getMinutes()).padStart(2, '0')
return y + '-' + m + '-' + day + ' ' + h + ':' + min
},
countByStatus: function (tests) {
return {
total: tests.length,
passed: tests.filter(function (t) { return t.status === 'pass' }).length,
failed: tests.filter(function (t) { return t.status === 'fail' }).length,
}
},
getSummary: function (tests) {
var s = this.countByStatus(tests)
var type = s.failed > 0 ? 'danger' : (s.passed === s.total ? 'success' : 'warning')
return { total: s.total, passed: s.passed, failed: s.failed, type: type }
},
statusIcon: function (status) {
var map = { 'pass': 'el-icon-success', 'fail': 'el-icon-error', 'running': 'el-icon-loading', 'pending': 'el-icon-time', 'skip': 'el-icon-minus' }
return map[status] || 'el-icon-question'
},
statusLabel: function (status) {
var map = { 'pass': '通过', 'fail': '失败', 'running': '执行中', 'pending': '待执行', 'skip': '跳过' }
return map[status] || '未知'
},
categoryTagType: function (cat) {
var map = {
'LLM': '', 'MCP': '', '计量': 'info', '语义': 'warning', '安全': 'danger',
'进程': '', 'Pipe': '', 'MAC': 'danger', '操作': 'success',
'Skill': 'warning', '本地LLM': 'info', '存储': 'info', '熔断': 'danger',
}
return map[cat] || 'info'
},
testRowClass: function (ref) {
var row = ref.row
if (row.status === 'pass') return 'row-pass'
if (row.status === 'fail') return 'row-fail'
return ''
},
showDetail: function (row) {
this.detailDialog = {
visible: true,
title: row.name + ' — 详细信息',
content: typeof row.detail === 'string' ? row.detail : JSON.stringify(row.detail, null, 2)
}
},
// ====== 测试执行引擎 ======
runSingleTest: function (test, executor) {
var self = this
test.status = 'running'
test.duration = null
test.detail = null
var start = performance.now()
return executor(test).then(function (result) {
test.duration = Math.round(performance.now() - start)
test.status = result.success ? 'pass' : 'fail'
test.detail = result.detail || null
}).catch(function (e) {
test.duration = Math.round(performance.now() - start)
test.status = 'fail'
test.detail = 'Error: ' + (e.message || e)
})
},
runExternalTests: function () {
var self = this
self.isRunningExternal = true
var chain = Promise.resolve()
self.externalTests.forEach(function (test) {
chain = chain.then(function () {
return self.runSingleTest(test, function (t) { return self.executeExternalTest(t) })
})
})
return chain.then(function () {
self.isRunningExternal = false
self.updateNodeStatus()
})
},
runInternalTests: function () {
var self = this
self.isRunningInternal = true
var chain = Promise.resolve()
self.internalTests.forEach(function (test) {
chain = chain.then(function () {
return self.runSingleTest(test, function (t) { return self.executeInternalTest(t) })
})
})
return chain.then(function () {
self.isRunningInternal = false
self.updateNodeStatus()
})
},
runAllTests: function () {
var self = this
self.isRunningAll = true
return self.runExternalTests().then(function () {
return self.runInternalTests()
}).then(function () {
self.isRunningAll = false
})
},
executeExternalTest: function (test) {
if (typeof window.sgClawTestRunner !== 'undefined') {
return window.sgClawTestRunner.runExternal(test.name)
}
return this.sleep(200 + Math.random() * 600).then(function () {
return { success: true, detail: '[Mock] 测试通过 — 请接入实际 API 后重新验证' }
})
},
executeInternalTest: function (test) {
if (typeof window.sgClawTestRunner !== 'undefined') {
return window.sgClawTestRunner.runInternal(test.name)
}
return this.sleep(50 + Math.random() * 250).then(function () {
return { success: true, detail: '[Mock] 测试通过 — 请接入 sgClaw 进程后重新验证' }
})
},
updateNodeStatus: function () {
this.nodeStatus.frontend = true
this.nodeStatus.browser = this.internalTests
.filter(function (t) { return ['进程', 'Pipe', 'MAC', '操作'].indexOf(t.category) >= 0 })
.some(function (t) { return t.status === 'pass' })
this.nodeStatus.rust = this.internalTests
.filter(function (t) { return ['Pipe', 'Skill', '熔断', '存储'].indexOf(t.category) >= 0 })
.some(function (t) { return t.status === 'pass' })
this.nodeStatus.llm = this.externalTests
.filter(function (t) { return t.category === 'LLM' })
.some(function (t) { return t.status === 'pass' })
},
exportReport: function () {
var data = {
date: this.reportDate,
overall: this.overallStatus,
external: this.externalTests.map(function (t) {
return { name: t.name, category: t.category, status: t.status, duration: t.duration }
}),
internal: this.internalTests.map(function (t) {
return { name: t.name, category: t.category, status: t.status, duration: t.duration }
}),
e2e: this.e2eScenarios.map(function (s) {
return { name: s.name, status: s.status, metrics: s.metrics }
}),
performance: this.perfMetrics
}
var blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' })
var url = URL.createObjectURL(blob)
var a = document.createElement('a')
a.href = url
a.download = 'sgclaw-report-' + this.reportDate.replace(/[: ]/g, '-') + '.json'
a.click()
URL.revokeObjectURL(url)
this.$message.success('报告已导出')
},
sleep: function (ms) {
return new Promise(function (resolve) { setTimeout(resolve, ms) })
}
},
mounted: function () {
this.reportDate = this.formatDate(new Date())
this.nodeStatus.frontend = true
this.ready = true
}
})
</script>
</body>
</html>