diff --git a/src/agent/mod.rs b/src/agent/mod.rs index 296d173..323348d 100644 --- a/src/agent/mod.rs +++ b/src/agent/mod.rs @@ -4,8 +4,11 @@ pub mod runtime; use std::ffi::OsString; use std::path::PathBuf; +use crate::compat::runtime::CompatTaskContext; use crate::config::DeepSeekSettings; -use crate::pipe::{AgentMessage, BrowserMessage, BrowserPipeTool, PipeError, Transport}; +use crate::pipe::{ + AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport, +}; #[derive(Debug, Clone, PartialEq, Eq)] pub struct AgentRuntimeContext { @@ -85,6 +88,41 @@ fn send_mode_log(transport: &T, mode: &str) -> Result<(), PipeErro }) } +fn explicit_non_task_response(history: &[ConversationMessage], instruction: &str) -> Option { + if !history.is_empty() { + return None; + } + + let trimmed = instruction.trim(); + if trimmed.is_empty() { + return Some("sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。".to_string()); + } + + const TASK_HINTS: &[&str] = &[ + "打开", "搜索", "点击", "输入", "导航", "跳转", "访问", "提取", "获取", "网页", "页面", + "标签页", "百度", "知乎", "google", "open", "search", "click", "type", "navigate", + ]; + if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) { + return None; + } + + const CHITCHAT_INPUTS: &[&str] = &[ + "hi", "hello", "hey", "你好", "您好", "嗨", "在吗", "你是谁", "介绍一下你自己", + ]; + if CHITCHAT_INPUTS + .iter() + .any(|candidate| trimmed.eq_ignore_ascii_case(candidate) || trimmed == *candidate) + { + return Some("sgClaw 现在是浏览器任务入口,不做通用闲聊。请直接说你想在网页上执行什么操作,例如“打开百度搜索天气”。".to_string()); + } + + if trimmed.chars().count() <= 8 { + return Some("sgClaw 现在只处理浏览器任务。请直接描述网页操作目标,例如“打开知乎搜索天气”或“提取当前页面标题”。".to_string()); + } + + None +} + fn execute_plan( transport: &T, browser_tool: &BrowserPipeTool, @@ -142,7 +180,36 @@ pub fn handle_browser_message_with_context( message: BrowserMessage, ) -> Result<(), PipeError> { match message { - BrowserMessage::SubmitTask { instruction } => { + BrowserMessage::SubmitTask { + instruction, + conversation_id, + messages, + page_url, + page_title, + } => { + if let Some(summary) = explicit_non_task_response(&messages, &instruction) { + return transport.send(&AgentMessage::TaskComplete { + success: false, + summary, + }); + } + + let task_context = CompatTaskContext { + conversation_id: (!conversation_id.trim().is_empty()) + .then_some(conversation_id.clone()), + messages, + page_url: (!page_url.trim().is_empty()).then_some(page_url), + page_title: (!page_title.trim().is_empty()).then_some(page_title), + }; + if !task_context.messages.is_empty() { + let _ = transport.send(&AgentMessage::LogEntry { + level: "info".to_string(), + message: format!( + "continuing conversation with {} prior turns", + task_context.messages.len() + ), + }); + } let completion = match context.load_deepseek_settings() { Ok(Some(settings)) => { let _ = transport.send(&AgentMessage::LogEntry { @@ -159,6 +226,7 @@ pub fn handle_browser_message_with_context( transport, browser_tool.clone(), &instruction, + &task_context, &context.workspace_root, &settings, ) { diff --git a/src/compat/browser_tool_adapter.rs b/src/compat/browser_tool_adapter.rs index 179212b..d33fa25 100644 --- a/src/compat/browser_tool_adapter.rs +++ b/src/compat/browser_tool_adapter.rs @@ -105,6 +105,7 @@ fn parse_browser_action_request(args: Value) -> Result ToolResult { } } +fn validate_action_params( + action_name: &str, + args: &Map, +) -> Result<(), BrowserActionAdapterError> { + match action_name { + "click" | "getText" => require_non_empty_string(args, "selector", action_name), + "type" => { + require_non_empty_string(args, "selector", action_name)?; + require_non_empty_string(args, "text", action_name) + } + "navigate" => require_non_empty_string(args, "url", action_name), + _ => Ok(()), + } +} + +fn require_non_empty_string( + args: &Map, + key: &'static str, + action_name: &str, +) -> Result<(), BrowserActionAdapterError> { + match args.get(key) { + Some(Value::String(value)) if !value.trim().is_empty() => Ok(()), + Some(other) => Err(BrowserActionAdapterError::InvalidArguments(format!( + "{action_name} requires a non-empty {key}, got {other}" + ))), + None => Err(BrowserActionAdapterError::InvalidArguments(format!( + "{action_name} requires {key}" + ))), + } +} + fn format_browser_action_error(data: &Value) -> String { if let Some(error) = data.get("error") { if let Some(message) = error.get("message").and_then(Value::as_str) { diff --git a/src/compat/runtime.rs b/src/compat/runtime.rs index d8618b2..3ce1c3c 100644 --- a/src/compat/runtime.rs +++ b/src/compat/runtime.rs @@ -19,12 +19,21 @@ use crate::compat::config_adapter::build_zeroclaw_config_from_settings; use crate::config::DeepSeekSettings; use crate::compat::event_bridge::log_entry_for_turn_event; use crate::compat::memory_adapter::build_memory; -use crate::pipe::{BrowserPipeTool, PipeError, Transport}; +use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport}; + +#[derive(Debug, Clone, Default)] +pub struct CompatTaskContext { + pub conversation_id: Option, + pub messages: Vec, + pub page_url: Option, + pub page_title: Option, +} pub fn execute_task( transport: &T, browser_tool: BrowserPipeTool, instruction: &str, + task_context: &CompatTaskContext, workspace_root: &Path, settings: &DeepSeekSettings, ) -> Result { @@ -38,6 +47,7 @@ pub fn execute_task( browser_tool, provider, instruction, + task_context, config, )) } @@ -47,9 +57,24 @@ pub async fn execute_task_with_provider( browser_tool: BrowserPipeTool, provider: Box, instruction: &str, + task_context: &CompatTaskContext, config: ZeroClawConfig, ) -> Result { let mut agent = build_agent(browser_tool, provider, &config)?; + if let Some(conversation_id) = task_context + .conversation_id + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + { + agent.set_memory_session_id(Some(conversation_id.to_string())); + } + + let seed_messages = build_seed_history(task_context); + if !seed_messages.is_empty() { + agent.seed_history(&seed_messages); + } + let (event_tx, mut event_rx) = tokio::sync::mpsc::channel::(32); let instruction = instruction.to_string(); @@ -196,3 +221,25 @@ impl Provider for NonStreamingProvider { stream::empty().boxed() } } + +fn build_seed_history(task_context: &CompatTaskContext) -> Vec { + task_context + .messages + .iter() + .filter_map(to_chat_message) + .collect() +} + +fn to_chat_message(message: &ConversationMessage) -> Option { + let content = message.content.trim(); + if content.is_empty() { + return None; + } + + match message.role.as_str() { + "user" => Some(ChatMessage::user(content)), + "assistant" => Some(ChatMessage::assistant(content)), + "system" => Some(ChatMessage::system(content)), + _ => None, + } +} diff --git a/src/pipe/mod.rs b/src/pipe/mod.rs index cb7c8de..133fa22 100644 --- a/src/pipe/mod.rs +++ b/src/pipe/mod.rs @@ -5,7 +5,8 @@ pub mod protocol; pub use browser_tool::{BrowserPipeTool, CommandOutput}; pub use handshake::{perform_handshake, HandshakeResult}; pub use protocol::{ - supported_actions, Action, AgentMessage, BrowserMessage, SecurityFields, Timing, + supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage, + SecurityFields, Timing, }; use std::io::{BufRead, BufReader, Read, Write}; diff --git a/src/pipe/protocol.rs b/src/pipe/protocol.rs index f78423e..c98cbae 100644 --- a/src/pipe/protocol.rs +++ b/src/pipe/protocol.rs @@ -14,6 +14,14 @@ pub enum BrowserMessage { }, SubmitTask { instruction: String, + #[serde(default)] + conversation_id: String, + #[serde(default)] + messages: Vec, + #[serde(default)] + page_url: String, + #[serde(default)] + page_title: String, }, Response { seq: u64, @@ -26,6 +34,12 @@ pub enum BrowserMessage { }, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ConversationMessage { + pub role: String, + pub content: String, +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum AgentMessage { diff --git a/tests/compat_browser_tool_test.rs b/tests/compat_browser_tool_test.rs index 5b4e5ab..485769a 100644 --- a/tests/compat_browser_tool_test.rs +++ b/tests/compat_browser_tool_test.rs @@ -201,3 +201,56 @@ async fn zeroclaw_browser_tool_keeps_domain_validation_in_mac_policy() { .contains("domain is not allowed") ); } + +#[tokio::test] +async fn zeroclaw_browser_tool_rejects_missing_required_action_parameters() { + let (transport, tool) = build_adapter(vec![]); + + let missing_click_selector = tool + .execute(json!({ + "action": "click", + "expected_domain": "www.baidu.com" + })) + .await + .unwrap(); + let missing_text_selector = tool + .execute(json!({ + "action": "getText", + "expected_domain": "www.baidu.com" + })) + .await + .unwrap(); + let missing_navigate_url = tool + .execute(json!({ + "action": "navigate", + "expected_domain": "www.baidu.com" + })) + .await + .unwrap(); + + assert!(!missing_click_selector.success); + assert!(!missing_text_selector.success); + assert!(!missing_navigate_url.success); + assert_eq!(transport.sent_messages().len(), 0); + assert!( + missing_click_selector + .error + .as_deref() + .unwrap() + .contains("click requires selector") + ); + assert!( + missing_text_selector + .error + .as_deref() + .unwrap() + .contains("getText requires selector") + ); + assert!( + missing_navigate_url + .error + .as_deref() + .unwrap() + .contains("navigate requires url") + ); +} diff --git a/tests/compat_runtime_test.rs b/tests/compat_runtime_test.rs index 4a12b7c..8c6e352 100644 --- a/tests/compat_runtime_test.rs +++ b/tests/compat_runtime_test.rs @@ -15,9 +15,11 @@ use sgclaw::agent::{ handle_browser_message_with_context, AgentRuntimeContext, }; -use sgclaw::compat::runtime::execute_task; +use sgclaw::compat::runtime::{execute_task, CompatTaskContext}; use sgclaw::config::DeepSeekSettings; -use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing}; +use sgclaw::pipe::{ + Action, AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, Timing, +}; use sgclaw::security::MacPolicy; use uuid::Uuid; @@ -232,6 +234,7 @@ fn compat_runtime_uses_zeroclaw_provider_path_and_executes_browser_actions() { transport.as_ref(), browser_tool, "打开百度搜索天气", + &CompatTaskContext::default(), &workspace_root, &settings, ) @@ -396,6 +399,10 @@ fn handle_browser_message_prefers_compat_runtime_for_supported_instruction_when_ &runtime_context, BrowserMessage::SubmitTask { instruction: "打开百度搜索天气".to_string(), + conversation_id: String::new(), + messages: vec![], + page_url: String::new(), + page_title: String::new(), }, ) .unwrap(); @@ -484,6 +491,10 @@ fn handle_browser_message_falls_back_to_compat_runtime_for_unsupported_instructi &browser_tool, BrowserMessage::SubmitTask { instruction: "帮我打开百度首页".to_string(), + conversation_id: String::new(), + messages: vec![], + page_url: String::new(), + page_title: String::new(), }, ) .unwrap(); @@ -509,3 +520,134 @@ fn handle_browser_message_falls_back_to_compat_runtime_for_unsupported_instructi })); assert_eq!(request_bodies.len(), 2); } + +#[test] +fn handle_browser_message_rejects_non_task_greeting_explicitly() { + let transport = Arc::new(MockTransport::new(vec![])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + handle_browser_message( + transport.as_ref(), + &browser_tool, + BrowserMessage::SubmitTask { + instruction: "你好".to_string(), + conversation_id: String::new(), + messages: vec![], + page_url: String::new(), + page_title: String::new(), + }, + ) + .unwrap(); + + let sent = transport.sent_messages(); + assert!(matches!( + sent.last(), + Some(AgentMessage::TaskComplete { success, summary }) + if !success && summary.contains("浏览器任务入口") + )); +} + +#[test] +fn compat_runtime_includes_prior_turns_in_follow_up_provider_request() { + let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner()); + + let first_response = json!({ + "choices": [{ + "message": { + "content": "", + "tool_calls": [{ + "id": "call_1", + "type": "function", + "function": { + "name": "browser_action", + "arguments": serde_json::to_string(&json!({ + "action": "navigate", + "expected_domain": "www.zhihu.com", + "url": "https://www.zhihu.com/search?q=天气&type=content" + })).unwrap() + } + }] + } + }] + }); + let second_response = json!({ + "choices": [{ + "message": { + "content": "已在知乎搜索天气" + } + }] + }); + let (base_url, requests, server_handle) = + start_fake_deepseek_server(vec![first_response, second_response]); + + let workspace_root = temp_workspace_root(); + let settings = DeepSeekSettings { + api_key: "deepseek-test-key".to_string(), + base_url, + model: "deepseek-chat".to_string(), + }; + let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response { + seq: 1, + success: true, + data: json!({ "navigated": true }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 10, + }, + }])); + let browser_tool = BrowserPipeTool::new( + transport.clone(), + test_policy(), + vec![1, 2, 3, 4, 5, 6, 7, 8], + ) + .with_response_timeout(Duration::from_secs(1)); + + let task_context = CompatTaskContext { + conversation_id: Some("conversation-1".to_string()), + messages: vec![ + ConversationMessage { + role: "user".to_string(), + content: "打开百度搜索天气".to_string(), + }, + ConversationMessage { + role: "assistant".to_string(), + content: "已在百度搜索天气".to_string(), + }, + ], + page_url: Some("https://www.zhihu.com/".to_string()), + page_title: Some("知乎".to_string()), + }; + + let summary = execute_task( + transport.as_ref(), + browser_tool, + "打开知乎搜索天气", + &task_context, + &workspace_root, + &settings, + ) + .unwrap(); + server_handle.join().unwrap(); + + let request_bodies = requests.lock().unwrap().clone(); + let first_request_messages = request_bodies[0]["messages"] + .as_array() + .cloned() + .unwrap_or_default(); + + assert_eq!(summary, "已在知乎搜索天气"); + assert!(first_request_messages.iter().any(|message| { + message["role"] == json!("user") + && message["content"] == json!("打开百度搜索天气") + })); + assert!(first_request_messages.iter().any(|message| { + message["role"] == json!("assistant") + && message["content"] == json!("已在百度搜索天气") + })); +} diff --git a/tests/runtime_task_flow_test.rs b/tests/runtime_task_flow_test.rs index a174540..4ccd7e6 100644 --- a/tests/runtime_task_flow_test.rs +++ b/tests/runtime_task_flow_test.rs @@ -68,6 +68,10 @@ fn submit_task_sends_three_commands_and_finishes_with_task_complete() { &tool, BrowserMessage::SubmitTask { instruction: "打开百度搜索天气".to_string(), + conversation_id: String::new(), + messages: vec![], + page_url: String::new(), + page_title: String::new(), }, ) .unwrap(); diff --git a/tests/task_protocol_test.rs b/tests/task_protocol_test.rs index 14ffba5..da933de 100644 --- a/tests/task_protocol_test.rs +++ b/tests/task_protocol_test.rs @@ -8,13 +8,20 @@ type HmacSha256 = Hmac; #[test] fn browser_submit_task_round_trip_uses_task_wire_format() { - let raw = r#"{"type":"submit_task","instruction":"打开百度并搜索今日汇率"}"#; + let raw = r#"{"type":"submit_task","instruction":"打开百度并搜索今日汇率","conversation_id":"conversation-1","messages":[{"role":"assistant","content":"上一轮完成"}],"page_url":"https://www.baidu.com/","page_title":"百度一下"}"#; let message: BrowserMessage = serde_json::from_str(raw).unwrap(); assert_eq!( message, BrowserMessage::SubmitTask { instruction: "打开百度并搜索今日汇率".to_string(), + conversation_id: "conversation-1".to_string(), + messages: vec![sgclaw::pipe::ConversationMessage { + role: "assistant".to_string(), + content: "上一轮完成".to_string(), + }], + page_url: "https://www.baidu.com/".to_string(), + page_title: "百度一下".to_string(), } ); assert_eq!(serde_json::to_string(&message).unwrap(), raw); diff --git a/tools/browser_smoke/fake_deepseek_server.mjs b/tools/browser_smoke/fake_deepseek_server.mjs index a4441ee..583d4ff 100644 --- a/tools/browser_smoke/fake_deepseek_server.mjs +++ b/tools/browser_smoke/fake_deepseek_server.mjs @@ -134,6 +134,9 @@ export function normalizeSmokeInstruction(rawInstruction) { function instructionPlan(instruction) { const baiduQuery = extractQuery(instruction, ['打开百度搜索', '打开百度并搜索']) if (baiduQuery) { + const url = new URL('https://www.baidu.com/s') + url.searchParams.set('wd', baiduQuery) + return { key: 'baidu', summary: `已在百度搜索${baiduQuery}`, @@ -141,19 +144,7 @@ function instructionPlan(instruction) { browserToolCall('call_baidu_1', { action: 'navigate', expected_domain: BAIDU_DOMAIN, - url: BAIDU_URL, - }), - browserToolCall('call_baidu_2', { - action: 'type', - expected_domain: BAIDU_DOMAIN, - selector: BAIDU_INPUT_SELECTOR, - text: baiduQuery, - clear_first: true, - }), - browserToolCall('call_baidu_3', { - action: 'click', - expected_domain: BAIDU_DOMAIN, - selector: BAIDU_BUTTON_SELECTOR, + url: url.toString(), }), ], } diff --git a/tools/browser_smoke/run_deepseek_browser_smoke.mjs b/tools/browser_smoke/run_deepseek_browser_smoke.mjs index 3b5a8c7..251d8b0 100644 --- a/tools/browser_smoke/run_deepseek_browser_smoke.mjs +++ b/tools/browser_smoke/run_deepseek_browser_smoke.mjs @@ -67,6 +67,29 @@ function assertCompatRuntimeTraffic(requests) { if (!instructions.includes('打开知乎搜索天气')) { throw new Error('fake DeepSeek server did not receive the Zhihu smoke instruction') } + + const zhihuRequest = requests.find((entry) => { + return (entry.body?.messages ?? []).some((message) => + message?.role === 'user' && + normalizeSmokeInstruction(message.content) === '打开知乎搜索天气') + }) + + if (!zhihuRequest) { + throw new Error('fake DeepSeek server did not receive the Zhihu follow-up turn') + } + + const zhihuMessages = zhihuRequest.body?.messages ?? [] + const hasPriorUserTurn = zhihuMessages.some((message) => + message?.role === 'user' && + normalizeSmokeInstruction(message.content) === '打开百度搜索天气') + const hasPriorAssistantTurn = zhihuMessages.some((message) => + message?.role === 'assistant' && + typeof message.content === 'string' && + message.content.includes('已在百度搜索天气')) + + if (!hasPriorUserTurn || !hasPriorAssistantTurn) { + throw new Error('DeepSeek follow-up turn is missing prior browser conversation history') + } } main().catch((error) => {