Files
claw/tests/agent_runtime_test.rs
木炎 45b60e37f7 fix: restore zhihu export routing before direct submit
Keep Zhihu hotlist export requests on the orchestration path so natural-language submits without page context no longer fail in direct-submit routing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-12 19:24:09 +08:00

772 lines
24 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
mod common;
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use uuid::Uuid;
use common::MockTransport;
use sgclaw::agent::{
handle_browser_message, handle_browser_message_with_context, AgentRuntimeContext,
};
use sgclaw::agent::runtime::{browser_action_tool_definition, execute_task_with_provider};
use sgclaw::compat::runtime::CompatTaskContext;
use sgclaw::config::SgClawSettings;
use sgclaw::llm::{ChatMessage, LlmError, LlmProvider, ToolDefinition, ToolFunctionCall};
use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing};
use sgclaw::security::MacPolicy;
struct FakeProvider {
calls: Vec<ToolFunctionCall>,
}
impl LlmProvider for FakeProvider {
fn chat(
&self,
_messages: &[ChatMessage],
_tools: &[ToolDefinition],
) -> Result<Vec<ToolFunctionCall>, LlmError> {
Ok(self.calls.clone())
}
}
fn provider_path_test_policy() -> MacPolicy {
policy_for_domains(&["www.baidu.com"])
}
fn direct_runtime_test_policy() -> MacPolicy {
policy_for_domains(&["95598.sgcc.com.cn"])
}
fn policy_for_domains(domains: &[&str]) -> MacPolicy {
MacPolicy::from_json_str(
&serde_json::json!({
"version": "1.0",
"domains": { "allowed": domains },
"pipe_actions": {
"allowed": ["click", "type", "navigate", "getText", "eval"],
"blocked": []
}
})
.to_string(),
)
.unwrap()
}
fn build_direct_runtime_skill_root() -> PathBuf {
let root = std::env::temp_dir().join(format!(
"sgclaw-agent-runtime-skill-root-{}",
Uuid::new_v4()
));
let skill_dir = root.join("fault-details-report");
let script_dir = skill_dir.join("scripts");
fs::create_dir_all(&script_dir).unwrap();
fs::write(
skill_dir.join("SKILL.toml"),
r#"
[skill]
name = "fault-details-report"
description = "Collect 95598 fault detail data via browser eval."
version = "0.1.0"
[[tools]]
name = "collect_fault_details"
description = "Collect structured fault detail rows for a specific period."
kind = "browser_script"
command = "scripts/collect_fault_details.js"
[tools.args]
period = "YYYY-MM period to collect."
"#,
)
.unwrap();
fs::write(
script_dir.join("collect_fault_details.js"),
r#"
return {
fault_type: "outage",
observed_at: `${args.period}-15 09:00`,
affected_scope: "line-7",
expected_domain: args.expected_domain,
artifact_payload: "report artifact payload"
};
"#,
)
.unwrap();
root
}
fn write_direct_submit_config(workspace_root: &std::path::Path, skill_root: &std::path::Path) -> PathBuf {
let config_path = workspace_root.join("sgclaw_config.json");
fs::write(
&config_path,
serde_json::json!({
"providers": [],
"skillsDir": skill_root,
"directSubmitSkill": "fault-details-report.collect_fault_details"
})
.to_string(),
)
.unwrap();
config_path
}
fn direct_submit_runtime_context(skill_root: &std::path::Path) -> AgentRuntimeContext {
let workspace_root = std::env::temp_dir().join(format!(
"sgclaw-agent-runtime-workspace-{}",
Uuid::new_v4()
));
fs::create_dir_all(&workspace_root).unwrap();
let config_path = write_direct_submit_config(&workspace_root, skill_root);
AgentRuntimeContext::new(Some(config_path), workspace_root)
}
fn submit_fault_details_message() -> BrowserMessage {
BrowserMessage::SubmitTask {
instruction: "请采集 2026-03 的故障明细并返回结果".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: "https://95598.sgcc.com.cn/".to_string(),
page_title: "网上国网".to_string(),
}
}
fn submit_zhihu_hotlist_export_message() -> BrowserMessage {
BrowserMessage::SubmitTask {
instruction: "打开知乎热榜获取前10条数据并导出 Excel".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
}
}
fn direct_submit_mode_logs(sent: &[AgentMessage]) -> Vec<String> {
sent.iter()
.filter_map(|message| match message {
AgentMessage::LogEntry { level, message } if level == "mode" => Some(message.clone()),
_ => None,
})
.collect()
}
fn direct_submit_completion(sent: &[AgentMessage]) -> Option<(bool, String)> {
sent.iter().find_map(|message| match message {
AgentMessage::TaskComplete { success, summary } => Some((*success, summary.clone())),
_ => None,
})
}
fn success_browser_response(seq: u64, data: serde_json::Value) -> BrowserMessage {
BrowserMessage::Response {
seq,
success: true,
data,
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
}
}
fn report_artifact_browser_response(
seq: u64,
status: &str,
partial_reasons: &[&str],
detail_rows: Vec<serde_json::Value>,
summary_rows: Vec<serde_json::Value>,
) -> BrowserMessage {
success_browser_response(
seq,
serde_json::json!({
"text": {
"type": "report-artifact",
"report_name": "fault-details-report",
"period": "2026-03",
"selected_range": {
"start": "2026-03-08 16:00:00",
"end": "2026-03-09 16:00:00"
},
"columns": ["qxdbh"],
"rows": detail_rows,
"sections": [{
"name": "summary-sheet",
"columns": ["index"],
"rows": summary_rows
}],
"counts": {
"detail_rows": detail_rows.len(),
"summary_rows": summary_rows.len()
},
"status": status,
"partial_reasons": partial_reasons,
"downstream": {
"export": {
"attempted": true,
"success": status != "blocked" && status != "error",
"path": "http://localhost/export.xlsx"
},
"report_log": {
"attempted": true,
"success": partial_reasons.is_empty(),
"error": partial_reasons
.first()
.copied()
.unwrap_or("")
}
}
}
}),
)
}
#[test]
fn direct_submit_runtime_executes_fault_details_skill_without_provider_path() {
let skill_root = build_direct_runtime_skill_root();
let transport = Arc::new(MockTransport::new(vec![success_browser_response(
1,
serde_json::json!({
"text": {
"fault_type": "outage",
"observed_at": "2026-03-15 09:00",
"affected_scope": "line-7"
}
}),
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let mut settings = SgClawSettings::from_legacy_deepseek_fields(
"unused-key".to_string(),
"http://127.0.0.1:9".to_string(),
"unused-model".to_string(),
Some(skill_root.clone()),
)
.unwrap();
settings.direct_submit_skill = Some("fault-details-report.collect_fault_details".to_string());
let summary = sgclaw::compat::direct_skill_runtime::execute_direct_submit_skill(
browser_tool,
"请采集 2026-03 的故障明细并返回结果",
&CompatTaskContext {
page_url: Some("https://95598.sgcc.com.cn/".to_string()),
..CompatTaskContext::default()
},
PathBuf::from(env!("CARGO_MANIFEST_DIR")).as_path(),
&settings,
)
.unwrap();
assert!(summary.success);
assert!(summary.summary.contains("fault_type"));
let sent = transport.sent_messages();
assert!(sent.iter().all(|message| !matches!(message, AgentMessage::LogEntry { level, message } if level == "info" && message.contains("DeepSeek config loaded"))));
assert!(matches!(
&sent[0],
AgentMessage::Command {
seq,
action,
params,
security,
} if *seq == 1
&& action == &Action::Eval
&& security.expected_domain == "95598.sgcc.com.cn"
&& params["script"].as_str().is_some_and(|script| script.contains("2026-03"))
));
}
#[test]
fn submit_task_uses_direct_skill_mode_without_llm_configuration() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![success_browser_response(
1,
serde_json::json!({
"text": {
"fault_type": "outage",
"observed_at": "2026-03-15 09:00",
"affected_scope": "line-7",
"artifact_payload": "report artifact payload"
}
}),
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
let completion = direct_submit_completion(&sent).expect("task completion");
assert!(completion.0, "expected direct submit task to succeed: {sent:?}");
assert!(
completion.1.contains("report artifact payload"),
"expected report artifact payload in summary: {}",
completion.1
);
assert!(
!completion.1.contains("未配置大语言模型"),
"did not expect missing-llm summary: {}",
completion.1
);
}
#[test]
fn submit_task_rejects_invalid_direct_submit_skill_config_before_routing() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let workspace_root = std::env::temp_dir().join(format!(
"sgclaw-invalid-direct-submit-workspace-{}",
Uuid::new_v4()
));
fs::create_dir_all(&workspace_root).unwrap();
let config_path = workspace_root.join("sgclaw_config.json");
fs::write(
&config_path,
serde_json::json!({
"providers": [],
"skillsDir": skill_root,
"directSubmitSkill": "fault-details-report"
})
.to_string(),
)
.unwrap();
let runtime_context = AgentRuntimeContext::new(Some(config_path), workspace_root);
let transport = Arc::new(MockTransport::new(vec![]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
assert!(matches!(
sent.last(),
Some(AgentMessage::TaskComplete { success, summary })
if !success && summary.contains("skill.tool")
));
assert!(direct_submit_mode_logs(&sent).is_empty());
assert!(!sent.iter().any(|message| matches!(message, AgentMessage::Command { .. })));
}
#[test]
fn submit_task_treats_partial_report_artifact_as_success_with_warning_summary() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response(
1,
"partial",
&["report_log_failed"],
vec![serde_json::json!({ "qxdbh": "QX-1" })],
vec![serde_json::json!({ "index": 1 })],
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
let completion = direct_submit_completion(&sent).expect("task completion");
assert!(completion.0, "expected partial artifact to succeed: {sent:?}");
assert!(completion.1.contains("fault-details-report"));
assert!(completion.1.contains("2026-03"));
assert!(completion.1.contains("status=partial"));
assert!(completion.1.contains("detail_rows=1"));
assert!(completion.1.contains("summary_rows=1"));
assert!(completion.1.contains("report_log_failed"));
}
#[test]
fn submit_task_treats_empty_report_artifact_as_success() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response(
1,
"empty",
&[],
vec![],
vec![],
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
let completion = direct_submit_completion(&sent).expect("task completion");
assert!(completion.0, "expected empty artifact to succeed: {sent:?}");
assert!(completion.1.contains("status=empty"));
assert!(completion.1.contains("detail_rows=0"));
}
#[test]
fn submit_task_treats_blocked_report_artifact_as_failure() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response(
1,
"blocked",
&["selected_range_unavailable"],
vec![],
vec![],
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
let completion = direct_submit_completion(&sent).expect("task completion");
assert!(!completion.0, "expected blocked artifact to fail: {sent:?}");
assert!(completion.1.contains("status=blocked"));
assert!(completion.1.contains("selected_range_unavailable"));
}
#[test]
fn submit_task_treats_error_report_artifact_as_failure() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response(
1,
"error",
&["detail_normalization_failed"],
vec![],
vec![],
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
let completion = direct_submit_completion(&sent).expect("task completion");
assert!(!completion.0, "expected error artifact to fail: {sent:?}");
assert!(completion.1.contains("status=error"));
assert!(completion.1.contains("detail_normalization_failed"));
}
#[test]
fn submit_task_routes_zhihu_hotlist_export_before_direct_submit() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
policy_for_domains(&["www.zhihu.com"]),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_zhihu_hotlist_export_message(),
)
.unwrap();
let sent = transport.sent_messages();
let mode_logs = direct_submit_mode_logs(&sent);
let completion = direct_submit_completion(&sent).expect("task completion");
assert_eq!(mode_logs, vec!["zeroclaw_process_message_primary".to_string()]);
assert!(
!completion.0,
"expected zhihu export without page context to fail before browser actions: {sent:?}"
);
assert!(
!completion
.1
.contains("direct submit skill requires page_url so expected_domain can be derived"),
"unexpected direct submit fallback: {sent:?}"
);
}
#[test]
fn direct_skill_mode_logs_direct_skill_primary() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let skill_root = build_direct_runtime_skill_root();
let runtime_context = direct_submit_runtime_context(&skill_root);
let transport = Arc::new(MockTransport::new(vec![success_browser_response(
1,
serde_json::json!({
"text": {
"fault_type": "outage",
"observed_at": "2026-03-15 09:00",
"affected_scope": "line-7",
"artifact_payload": "report artifact payload"
}
}),
)]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
direct_runtime_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message_with_context(
transport.as_ref(),
&browser_tool,
&runtime_context,
submit_fault_details_message(),
)
.unwrap();
let sent = transport.sent_messages();
let mode_logs = direct_submit_mode_logs(&sent);
assert_eq!(mode_logs, vec!["direct_skill_primary".to_string()]);
assert!(
!mode_logs.iter().any(|mode| mode == "compat_llm_primary"),
"unexpected compat mode logs: {mode_logs:?}"
);
assert!(
!mode_logs
.iter()
.any(|mode| mode == "zeroclaw_process_message_primary"),
"unexpected zeroclaw mode logs: {mode_logs:?}"
);
}
#[test]
fn browser_action_tool_definition_uses_expected_name() {
let tool = browser_action_tool_definition();
assert_eq!(tool.name, "browser_action");
assert_eq!(tool.parameters["required"][0], "action");
assert_eq!(tool.parameters["required"][1], "expected_domain");
}
#[test]
fn runtime_executes_provider_tool_calls_and_returns_summary() {
let transport = Arc::new(MockTransport::new(vec![
BrowserMessage::Response {
seq: 1,
success: true,
data: serde_json::json!({ "navigated": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
BrowserMessage::Response {
seq: 2,
success: true,
data: serde_json::json!({ "typed": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
},
]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
provider_path_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let provider = FakeProvider {
calls: vec![
ToolFunctionCall {
id: "call-1".to_string(),
name: "browser_action".to_string(),
arguments: serde_json::json!({
"action": "navigate",
"expected_domain": "www.baidu.com",
"url": "https://www.baidu.com"
}),
},
ToolFunctionCall {
id: "call-2".to_string(),
name: "browser_action".to_string(),
arguments: serde_json::json!({
"action": "type",
"expected_domain": "www.baidu.com",
"selector": "#kw",
"text": "天气",
"clear_first": true
}),
},
],
};
let summary = execute_task_with_provider(
transport.as_ref(),
&browser_tool,
&provider,
"打开百度搜索天气",
)
.unwrap();
let sent = transport.sent_messages();
assert_eq!(summary, "已通过 Agent 执行任务: 打开百度搜索天气");
assert!(matches!(
&sent[0],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "navigate www.baidu.com"
));
assert!(matches!(
&sent[1],
AgentMessage::Command { seq, action, .. }
if *seq == 1 && action == &Action::Navigate
));
assert!(matches!(
&sent[2],
AgentMessage::LogEntry { level, message }
if level == "info" && message == "type www.baidu.com"
));
assert!(matches!(
&sent[3],
AgentMessage::Command { seq, action, .. }
if *seq == 2 && action == &Action::Type
));
}
#[test]
fn legacy_agent_runtime_is_explicitly_dev_only() {
assert!(sgclaw::agent::runtime::LEGACY_DEV_ONLY);
}
#[test]
fn production_submit_task_does_not_route_into_legacy_runtime_without_llm_config() {
std::env::remove_var("DEEPSEEK_API_KEY");
std::env::remove_var("DEEPSEEK_BASE_URL");
std::env::remove_var("DEEPSEEK_MODEL");
let transport = Arc::new(MockTransport::new(vec![]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
provider_path_test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message(
transport.as_ref(),
&browser_tool,
BrowserMessage::SubmitTask {
instruction: "打开百度".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
},
)
.unwrap();
let sent = transport.sent_messages();
assert!(matches!(
sent.last(),
Some(AgentMessage::TaskComplete { success, summary })
if !success && summary.contains("未配置大语言模型")
));
assert!(!sent
.iter()
.any(|message| { matches!(message, AgentMessage::Command { .. }) }));
}