From 7443b9da7f7004f22d59c8d74daf7ad14a1ef1aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=A8=E7=82=8E?= <635735027@qq.com> Date: Fri, 10 Apr 2026 17:59:24 +0800 Subject: [PATCH] fix: classify direct report artifacts by status Treat direct skill report-artifact payloads as task outcomes so partial and empty reports stay successful while blocked and error statuses fail explicitly. Co-Authored-By: Claude Sonnet 4.6 --- src/agent/mod.rs | 6 +- src/compat/direct_skill_runtime.rs | 156 +++++++++++++++++- tests/agent_runtime_test.rs | 210 +++++++++++++++++++++++- tests/browser_script_skill_tool_test.rs | 115 +++++++++++++ 4 files changed, 480 insertions(+), 7 deletions(-) diff --git a/src/agent/mod.rs b/src/agent/mod.rs index ecee56c..9edfc0e 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -233,9 +233,9 @@ pub fn handle_browser_message_with_context( &context.workspace_root, &settings, ) { - Ok(summary) => AgentMessage::TaskComplete { - success: true, - summary, + Ok(outcome) => AgentMessage::TaskComplete { + success: outcome.success, + summary: outcome.summary, }, Err(err) => AgentMessage::TaskComplete { success: false, diff --git a/src/compat/direct_skill_runtime.rs b/src/compat/direct_skill_runtime.rs index 8b3b74e..37d65b6 100644 --- a/src/compat/direct_skill_runtime.rs +++ b/src/compat/direct_skill_runtime.rs @@ -10,13 +10,19 @@ use crate::compat::runtime::CompatTaskContext; use crate::config::SgClawSettings; use crate::pipe::{BrowserPipeTool, PipeError, Transport}; +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DirectSubmitOutcome { + pub success: bool, + pub summary: String, +} + pub fn execute_direct_submit_skill( browser_tool: BrowserPipeTool, instruction: &str, task_context: &CompatTaskContext, workspace_root: &Path, settings: &SgClawSettings, -) -> Result { +) -> Result { let configured_tool = settings .direct_submit_skill .as_deref() @@ -80,7 +86,7 @@ pub fn execute_direct_submit_skill( .map_err(|err| PipeError::Protocol(err.to_string()))?; if result.success { - Ok(result.output) + Ok(interpret_direct_submit_output(&result.output)) } else { Err(PipeError::Protocol( result @@ -90,6 +96,104 @@ pub fn execute_direct_submit_skill( } } +fn interpret_direct_submit_output(output: &str) -> DirectSubmitOutcome { + let Some(payload) = serde_json::from_str::(output).ok() else { + return DirectSubmitOutcome { + success: true, + summary: output.to_string(), + }; + }; + + let Some(artifact) = payload.as_object() else { + return DirectSubmitOutcome { + success: true, + summary: output.to_string(), + }; + }; + + if artifact.get("type").and_then(Value::as_str) != Some("report-artifact") { + return DirectSubmitOutcome { + success: true, + summary: output.to_string(), + }; + } + + let status = artifact + .get("status") + .and_then(Value::as_str) + .unwrap_or("ok"); + let success = matches!(status, "ok" | "partial" | "empty"); + let report_name = artifact + .get("report_name") + .and_then(Value::as_str) + .unwrap_or("report-artifact"); + let period = artifact + .get("period") + .and_then(Value::as_str) + .unwrap_or(""); + let detail_rows = count_rows(artifact.get("counts"), artifact.get("rows"), "detail_rows"); + let summary_rows = count_summary_rows(artifact.get("counts"), artifact.get("sections")); + let partial_reasons = artifact + .get("partial_reasons") + .and_then(Value::as_array) + .map(|reasons| { + reasons + .iter() + .filter_map(Value::as_str) + .filter(|value| !value.trim().is_empty()) + .collect::>() + }) + .unwrap_or_default(); + + let mut parts = vec![report_name.to_string()]; + if !period.trim().is_empty() { + parts.push(period.to_string()); + } + parts.push(format!("status={status}")); + parts.push(format!("detail_rows={detail_rows}")); + parts.push(format!("summary_rows={summary_rows}")); + if !partial_reasons.is_empty() { + parts.push(format!("partial_reasons={}", partial_reasons.join(","))); + } + + DirectSubmitOutcome { + success, + summary: parts.join(" "), + } +} + +fn count_rows(counts: Option<&Value>, rows: Option<&Value>, key: &str) -> usize { + counts + .and_then(Value::as_object) + .and_then(|counts| counts.get(key)) + .and_then(Value::as_u64) + .map(|count| count as usize) + .or_else(|| rows.and_then(Value::as_array).map(Vec::len)) + .unwrap_or(0) +} + +fn count_summary_rows(counts: Option<&Value>, sections: Option<&Value>) -> usize { + counts + .and_then(Value::as_object) + .and_then(|counts| counts.get("summary_rows")) + .and_then(Value::as_u64) + .map(|count| count as usize) + .or_else(|| { + sections + .and_then(Value::as_array) + .and_then(|sections| { + sections.iter().find_map(|section| { + section + .as_object() + .and_then(|section| section.get("rows")) + .and_then(Value::as_array) + .map(Vec::len) + }) + }) + }) + .unwrap_or(0) +} + fn parse_configured_tool_name(configured_tool: &str) -> Result<(&str, &str), PipeError> { let (skill_name, tool_name) = configured_tool.split_once('.').ok_or_else(|| { PipeError::Protocol(format!( @@ -162,7 +266,11 @@ fn is_year_month(candidate: &str) -> bool { #[cfg(test)] mod tests { - use super::{derive_period, is_year_month, parse_configured_tool_name}; + use super::{ + count_rows, count_summary_rows, derive_period, interpret_direct_submit_output, + is_year_month, parse_configured_tool_name, + }; + use serde_json::json; #[test] fn parse_configured_tool_name_requires_skill_and_tool() { @@ -186,4 +294,46 @@ mod tests { assert!(!is_year_month("2026-00")); assert!(!is_year_month("2026-13")); } + + #[test] + fn interpret_direct_submit_output_maps_report_artifact_statuses() { + let partial = interpret_direct_submit_output( + &json!({ + "type": "report-artifact", + "report_name": "fault-details-report", + "period": "2026-03", + "counts": { "detail_rows": 1, "summary_rows": 1 }, + "status": "partial", + "partial_reasons": ["report_log_failed"] + }) + .to_string(), + ); + assert!(partial.success); + assert!(partial.summary.contains("status=partial")); + assert!(partial.summary.contains("report_log_failed")); + + let blocked = interpret_direct_submit_output( + &json!({ + "type": "report-artifact", + "report_name": "fault-details-report", + "status": "blocked", + "partial_reasons": ["selected_range_unavailable"] + }) + .to_string(), + ); + assert!(!blocked.success); + assert!(blocked.summary.contains("status=blocked")); + } + + #[test] + fn row_count_helpers_fall_back_to_payload_shapes() { + assert_eq!( + count_rows(None, Some(&json!([{ "qxdbh": "QX-1" }, { "qxdbh": "QX-2" }])), "detail_rows"), + 2 + ); + assert_eq!( + count_summary_rows(None, Some(&json!([{ "name": "summary-sheet", "rows": [{ "index": 1 }] }]))), + 1 + ); + } } diff --git a/tests/agent_runtime_test.rs b/tests/agent_runtime_test.rs index 8b806d8..ea18f79 100644 --- a/tests/agent_runtime_test.rs +++ b/tests/agent_runtime_test.rs @@ -164,6 +164,57 @@ fn success_browser_response(seq: u64, data: serde_json::Value) -> BrowserMessage } } +fn report_artifact_browser_response( + seq: u64, + status: &str, + partial_reasons: &[&str], + detail_rows: Vec, + summary_rows: Vec, +) -> BrowserMessage { + success_browser_response( + seq, + serde_json::json!({ + "text": { + "type": "report-artifact", + "report_name": "fault-details-report", + "period": "2026-03", + "selected_range": { + "start": "2026-03-08 16:00:00", + "end": "2026-03-09 16:00:00" + }, + "columns": ["qxdbh"], + "rows": detail_rows, + "sections": [{ + "name": "summary-sheet", + "columns": ["index"], + "rows": summary_rows + }], + "counts": { + "detail_rows": detail_rows.len(), + "summary_rows": summary_rows.len() + }, + "status": status, + "partial_reasons": partial_reasons, + "downstream": { + "export": { + "attempted": true, + "success": status != "blocked" && status != "error", + "path": "http://localhost/export.xlsx" + }, + "report_log": { + "attempted": true, + "success": partial_reasons.is_empty(), + "error": partial_reasons + .first() + .copied() + .unwrap_or("") + } + } + } + }), + ) +} + #[test] fn direct_submit_runtime_executes_fault_details_skill_without_provider_path() { let skill_root = build_direct_runtime_skill_root(); @@ -204,7 +255,8 @@ fn direct_submit_runtime_executes_fault_details_skill_without_provider_path() { ) .unwrap(); - assert!(summary.contains("fault_type")); + assert!(summary.success); + assert!(summary.summary.contains("fault_type")); let sent = transport.sent_messages(); assert!(sent.iter().all(|message| !matches!(message, AgentMessage::LogEntry { level, message } if level == "info" && message.contains("DeepSeek config loaded")))); assert!(matches!( @@ -322,6 +374,162 @@ fn submit_task_rejects_invalid_direct_submit_skill_config_before_routing() { assert!(!sent.iter().any(|message| matches!(message, AgentMessage::Command { .. }))); } +#[test] +fn submit_task_treats_partial_report_artifact_as_success_with_warning_summary() { + std::env::remove_var("DEEPSEEK_API_KEY"); + std::env::remove_var("DEEPSEEK_BASE_URL"); + std::env::remove_var("DEEPSEEK_MODEL"); + + let skill_root = build_direct_runtime_skill_root(); + let runtime_context = direct_submit_runtime_context(&skill_root); + let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response( + 1, + "partial", + &["report_log_failed"], + vec![serde_json::json!({ "qxdbh": "QX-1" })], + vec![serde_json::json!({ "index": 1 })], + )])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + direct_runtime_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + handle_browser_message_with_context( + transport.as_ref(), + &browser_tool, + &runtime_context, + submit_fault_details_message(), + ) + .unwrap(); + + let sent = transport.sent_messages(); + let completion = direct_submit_completion(&sent).expect("task completion"); + + assert!(completion.0, "expected partial artifact to succeed: {sent:?}"); + assert!(completion.1.contains("fault-details-report")); + assert!(completion.1.contains("2026-03")); + assert!(completion.1.contains("status=partial")); + assert!(completion.1.contains("detail_rows=1")); + assert!(completion.1.contains("summary_rows=1")); + assert!(completion.1.contains("report_log_failed")); +} + +#[test] +fn submit_task_treats_empty_report_artifact_as_success() { + std::env::remove_var("DEEPSEEK_API_KEY"); + std::env::remove_var("DEEPSEEK_BASE_URL"); + std::env::remove_var("DEEPSEEK_MODEL"); + + let skill_root = build_direct_runtime_skill_root(); + let runtime_context = direct_submit_runtime_context(&skill_root); + let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response( + 1, + "empty", + &[], + vec![], + vec![], + )])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + direct_runtime_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + handle_browser_message_with_context( + transport.as_ref(), + &browser_tool, + &runtime_context, + submit_fault_details_message(), + ) + .unwrap(); + + let sent = transport.sent_messages(); + let completion = direct_submit_completion(&sent).expect("task completion"); + + assert!(completion.0, "expected empty artifact to succeed: {sent:?}"); + assert!(completion.1.contains("status=empty")); + assert!(completion.1.contains("detail_rows=0")); +} + +#[test] +fn submit_task_treats_blocked_report_artifact_as_failure() { + std::env::remove_var("DEEPSEEK_API_KEY"); + std::env::remove_var("DEEPSEEK_BASE_URL"); + std::env::remove_var("DEEPSEEK_MODEL"); + + let skill_root = build_direct_runtime_skill_root(); + let runtime_context = direct_submit_runtime_context(&skill_root); + let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response( + 1, + "blocked", + &["selected_range_unavailable"], + vec![], + vec![], + )])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + direct_runtime_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + handle_browser_message_with_context( + transport.as_ref(), + &browser_tool, + &runtime_context, + submit_fault_details_message(), + ) + .unwrap(); + + let sent = transport.sent_messages(); + let completion = direct_submit_completion(&sent).expect("task completion"); + + assert!(!completion.0, "expected blocked artifact to fail: {sent:?}"); + assert!(completion.1.contains("status=blocked")); + assert!(completion.1.contains("selected_range_unavailable")); +} + +#[test] +fn submit_task_treats_error_report_artifact_as_failure() { + std::env::remove_var("DEEPSEEK_API_KEY"); + std::env::remove_var("DEEPSEEK_BASE_URL"); + std::env::remove_var("DEEPSEEK_MODEL"); + + let skill_root = build_direct_runtime_skill_root(); + let runtime_context = direct_submit_runtime_context(&skill_root); + let transport = Arc::new(MockTransport::new(vec![report_artifact_browser_response( + 1, + "error", + &["detail_normalization_failed"], + vec![], + vec![], + )])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + direct_runtime_test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + handle_browser_message_with_context( + transport.as_ref(), + &browser_tool, + &runtime_context, + submit_fault_details_message(), + ) + .unwrap(); + + let sent = transport.sent_messages(); + let completion = direct_submit_completion(&sent).expect("task completion"); + + assert!(!completion.0, "expected error artifact to fail: {sent:?}"); + assert!(completion.1.contains("status=error")); + assert!(completion.1.contains("detail_normalization_failed")); +} + #[test] fn direct_skill_mode_logs_direct_skill_primary() { std::env::remove_var("DEEPSEEK_API_KEY"); diff --git a/tests/browser_script_skill_tool_test.rs b/tests/browser_script_skill_tool_test.rs index 43c8ccc..3b3e61c 100644 --- a/tests/browser_script_skill_tool_test.rs +++ b/tests/browser_script_skill_tool_test.rs @@ -362,6 +362,121 @@ return { )); } +#[tokio::test] +async fn execute_browser_script_tool_preserves_structured_report_artifact_payload() { + let skill_dir = unique_temp_dir("sgclaw-browser-script-helper-report-artifact"); + let scripts_dir = skill_dir.join("scripts"); + fs::create_dir_all(&scripts_dir).unwrap(); + fs::write( + scripts_dir.join("collect_fault_details.js"), + r#" +return { + type: "report-artifact", + report_name: "fault-details-report", + period: args.period, + selected_range: { + start: "2026-03-08 16:00:00", + end: "2026-03-09 16:00:00" + }, + columns: ["qxdbh"], + rows: [{ qxdbh: "QX-1" }], + sections: [{ name: "summary-sheet", columns: ["index"], rows: [{ index: 1 }] }], + counts: { detail_rows: 1, summary_rows: 1 }, + status: "partial", + partial_reasons: ["report_log_failed"], + downstream: { + export: { attempted: true, success: true, path: "http://localhost/export.xlsx" }, + report_log: { attempted: true, success: false, error: "500" } + } +}; +"#, + ) + .unwrap(); + + let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response { + seq: 1, + success: true, + data: json!({ + "text": { + "type": "report-artifact", + "report_name": "fault-details-report", + "period": "2026-03", + "selected_range": { + "start": "2026-03-08 16:00:00", + "end": "2026-03-09 16:00:00" + }, + "columns": ["qxdbh"], + "rows": [{ "qxdbh": "QX-1" }], + "sections": [{ "name": "summary-sheet", "columns": ["index"], "rows": [{ "index": 1 }] }], + "counts": { "detail_rows": 1, "summary_rows": 1 }, + "status": "partial", + "partial_reasons": ["report_log_failed"], + "downstream": { + "export": { "attempted": true, "success": true, "path": "http://localhost/export.xlsx" }, + "report_log": { "attempted": true, "success": false, "error": "500" } + } + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 5, + }, + }])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + let mut tool_args = HashMap::new(); + tool_args.insert("period".to_string(), "YYYY-MM period to collect".to_string()); + let skill_tool = SkillTool { + name: "collect_fault_details".to_string(), + description: "Collect structured fault details".to_string(), + kind: "browser_script".to_string(), + command: "scripts/collect_fault_details.js".to_string(), + args: tool_args, + }; + + let result = execute_browser_script_tool( + &skill_tool, + &skill_dir, + browser_tool, + json!({ + "expected_domain": "https://www.zhihu.com/", + "period": "2026-03" + }), + ) + .await + .unwrap(); + + assert!(result.success); + assert_eq!( + serde_json::from_str::(&result.output).unwrap(), + json!({ + "type": "report-artifact", + "report_name": "fault-details-report", + "period": "2026-03", + "selected_range": { + "start": "2026-03-08 16:00:00", + "end": "2026-03-09 16:00:00" + }, + "columns": ["qxdbh"], + "rows": [{ "qxdbh": "QX-1" }], + "sections": [{ "name": "summary-sheet", "columns": ["index"], "rows": [{ "index": 1 }] }], + "counts": { "detail_rows": 1, "summary_rows": 1 }, + "status": "partial", + "partial_reasons": ["report_log_failed"], + "downstream": { + "export": { "attempted": true, "success": true, "path": "http://localhost/export.xlsx" }, + "report_log": { "attempted": true, "success": false, "error": "500" } + } + }) + ); +} + fn unique_temp_dir(prefix: &str) -> PathBuf { let nanos = SystemTime::now() .duration_since(UNIX_EPOCH)