feat: persist sgclaw browser conversations

This commit is contained in:
zyl
2026-03-27 01:57:42 +08:00
parent bae0e452a5
commit d315c13f66
11 changed files with 402 additions and 20 deletions

View File

@@ -4,8 +4,11 @@ pub mod runtime;
use std::ffi::OsString; use std::ffi::OsString;
use std::path::PathBuf; use std::path::PathBuf;
use crate::compat::runtime::CompatTaskContext;
use crate::config::DeepSeekSettings; use crate::config::DeepSeekSettings;
use crate::pipe::{AgentMessage, BrowserMessage, BrowserPipeTool, PipeError, Transport}; use crate::pipe::{
AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
};
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct AgentRuntimeContext { pub struct AgentRuntimeContext {
@@ -85,6 +88,41 @@ fn send_mode_log<T: Transport>(transport: &T, mode: &str) -> Result<(), PipeErro
}) })
} }
fn explicit_non_task_response(history: &[ConversationMessage], instruction: &str) -> Option<String> {
if !history.is_empty() {
return None;
}
let trimmed = instruction.trim();
if trimmed.is_empty() {
return Some("sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。".to_string());
}
const TASK_HINTS: &[&str] = &[
"打开", "搜索", "点击", "输入", "导航", "跳转", "访问", "提取", "获取", "网页", "页面",
"标签页", "百度", "知乎", "google", "open", "search", "click", "type", "navigate",
];
if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) {
return None;
}
const CHITCHAT_INPUTS: &[&str] = &[
"hi", "hello", "hey", "你好", "您好", "", "在吗", "你是谁", "介绍一下你自己",
];
if CHITCHAT_INPUTS
.iter()
.any(|candidate| trimmed.eq_ignore_ascii_case(candidate) || trimmed == *candidate)
{
return Some("sgClaw 现在是浏览器任务入口,不做通用闲聊。请直接说你想在网页上执行什么操作,例如“打开百度搜索天气”。".to_string());
}
if trimmed.chars().count() <= 8 {
return Some("sgClaw 现在只处理浏览器任务。请直接描述网页操作目标,例如“打开知乎搜索天气”或“提取当前页面标题”。".to_string());
}
None
}
fn execute_plan<T: Transport>( fn execute_plan<T: Transport>(
transport: &T, transport: &T,
browser_tool: &BrowserPipeTool<T>, browser_tool: &BrowserPipeTool<T>,
@@ -142,7 +180,36 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
message: BrowserMessage, message: BrowserMessage,
) -> Result<(), PipeError> { ) -> Result<(), PipeError> {
match message { match message {
BrowserMessage::SubmitTask { instruction } => { BrowserMessage::SubmitTask {
instruction,
conversation_id,
messages,
page_url,
page_title,
} => {
if let Some(summary) = explicit_non_task_response(&messages, &instruction) {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary,
});
}
let task_context = CompatTaskContext {
conversation_id: (!conversation_id.trim().is_empty())
.then_some(conversation_id.clone()),
messages,
page_url: (!page_url.trim().is_empty()).then_some(page_url),
page_title: (!page_title.trim().is_empty()).then_some(page_title),
};
if !task_context.messages.is_empty() {
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"continuing conversation with {} prior turns",
task_context.messages.len()
),
});
}
let completion = match context.load_deepseek_settings() { let completion = match context.load_deepseek_settings() {
Ok(Some(settings)) => { Ok(Some(settings)) => {
let _ = transport.send(&AgentMessage::LogEntry { let _ = transport.send(&AgentMessage::LogEntry {
@@ -159,6 +226,7 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
transport, transport,
browser_tool.clone(), browser_tool.clone(),
&instruction, &instruction,
&task_context,
&context.workspace_root, &context.workspace_root,
&settings, &settings,
) { ) {

View File

@@ -105,6 +105,7 @@ fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, Bro
let action_name = take_required_string(&mut args, "action")?; let action_name = take_required_string(&mut args, "action")?;
let expected_domain = take_required_string(&mut args, "expected_domain")?; let expected_domain = take_required_string(&mut args, "expected_domain")?;
let action = parse_action(&action_name)?; let action = parse_action(&action_name)?;
validate_action_params(&action_name, &args)?;
Ok(BrowserActionRequest { Ok(BrowserActionRequest {
action, action,
@@ -146,6 +147,37 @@ fn failed_tool_result(error: String) -> ToolResult {
} }
} }
fn validate_action_params(
action_name: &str,
args: &Map<String, Value>,
) -> Result<(), BrowserActionAdapterError> {
match action_name {
"click" | "getText" => require_non_empty_string(args, "selector", action_name),
"type" => {
require_non_empty_string(args, "selector", action_name)?;
require_non_empty_string(args, "text", action_name)
}
"navigate" => require_non_empty_string(args, "url", action_name),
_ => Ok(()),
}
}
fn require_non_empty_string(
args: &Map<String, Value>,
key: &'static str,
action_name: &str,
) -> Result<(), BrowserActionAdapterError> {
match args.get(key) {
Some(Value::String(value)) if !value.trim().is_empty() => Ok(()),
Some(other) => Err(BrowserActionAdapterError::InvalidArguments(format!(
"{action_name} requires a non-empty {key}, got {other}"
))),
None => Err(BrowserActionAdapterError::InvalidArguments(format!(
"{action_name} requires {key}"
))),
}
}
fn format_browser_action_error(data: &Value) -> String { fn format_browser_action_error(data: &Value) -> String {
if let Some(error) = data.get("error") { if let Some(error) = data.get("error") {
if let Some(message) = error.get("message").and_then(Value::as_str) { if let Some(message) = error.get("message").and_then(Value::as_str) {

View File

@@ -19,12 +19,21 @@ use crate::compat::config_adapter::build_zeroclaw_config_from_settings;
use crate::config::DeepSeekSettings; use crate::config::DeepSeekSettings;
use crate::compat::event_bridge::log_entry_for_turn_event; use crate::compat::event_bridge::log_entry_for_turn_event;
use crate::compat::memory_adapter::build_memory; use crate::compat::memory_adapter::build_memory;
use crate::pipe::{BrowserPipeTool, PipeError, Transport}; use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport};
#[derive(Debug, Clone, Default)]
pub struct CompatTaskContext {
pub conversation_id: Option<String>,
pub messages: Vec<ConversationMessage>,
pub page_url: Option<String>,
pub page_title: Option<String>,
}
pub fn execute_task<T: Transport + 'static>( pub fn execute_task<T: Transport + 'static>(
transport: &T, transport: &T,
browser_tool: BrowserPipeTool<T>, browser_tool: BrowserPipeTool<T>,
instruction: &str, instruction: &str,
task_context: &CompatTaskContext,
workspace_root: &Path, workspace_root: &Path,
settings: &DeepSeekSettings, settings: &DeepSeekSettings,
) -> Result<String, PipeError> { ) -> Result<String, PipeError> {
@@ -38,6 +47,7 @@ pub fn execute_task<T: Transport + 'static>(
browser_tool, browser_tool,
provider, provider,
instruction, instruction,
task_context,
config, config,
)) ))
} }
@@ -47,9 +57,24 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
browser_tool: BrowserPipeTool<T>, browser_tool: BrowserPipeTool<T>,
provider: Box<dyn Provider>, provider: Box<dyn Provider>,
instruction: &str, instruction: &str,
task_context: &CompatTaskContext,
config: ZeroClawConfig, config: ZeroClawConfig,
) -> Result<String, PipeError> { ) -> Result<String, PipeError> {
let mut agent = build_agent(browser_tool, provider, &config)?; let mut agent = build_agent(browser_tool, provider, &config)?;
if let Some(conversation_id) = task_context
.conversation_id
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
agent.set_memory_session_id(Some(conversation_id.to_string()));
}
let seed_messages = build_seed_history(task_context);
if !seed_messages.is_empty() {
agent.seed_history(&seed_messages);
}
let (event_tx, mut event_rx) = tokio::sync::mpsc::channel::<TurnEvent>(32); let (event_tx, mut event_rx) = tokio::sync::mpsc::channel::<TurnEvent>(32);
let instruction = instruction.to_string(); let instruction = instruction.to_string();
@@ -196,3 +221,25 @@ impl Provider for NonStreamingProvider {
stream::empty().boxed() stream::empty().boxed()
} }
} }
fn build_seed_history(task_context: &CompatTaskContext) -> Vec<ChatMessage> {
task_context
.messages
.iter()
.filter_map(to_chat_message)
.collect()
}
fn to_chat_message(message: &ConversationMessage) -> Option<ChatMessage> {
let content = message.content.trim();
if content.is_empty() {
return None;
}
match message.role.as_str() {
"user" => Some(ChatMessage::user(content)),
"assistant" => Some(ChatMessage::assistant(content)),
"system" => Some(ChatMessage::system(content)),
_ => None,
}
}

View File

@@ -5,7 +5,8 @@ pub mod protocol;
pub use browser_tool::{BrowserPipeTool, CommandOutput}; pub use browser_tool::{BrowserPipeTool, CommandOutput};
pub use handshake::{perform_handshake, HandshakeResult}; pub use handshake::{perform_handshake, HandshakeResult};
pub use protocol::{ pub use protocol::{
supported_actions, Action, AgentMessage, BrowserMessage, SecurityFields, Timing, supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage,
SecurityFields, Timing,
}; };
use std::io::{BufRead, BufReader, Read, Write}; use std::io::{BufRead, BufReader, Read, Write};

View File

@@ -14,6 +14,14 @@ pub enum BrowserMessage {
}, },
SubmitTask { SubmitTask {
instruction: String, instruction: String,
#[serde(default)]
conversation_id: String,
#[serde(default)]
messages: Vec<ConversationMessage>,
#[serde(default)]
page_url: String,
#[serde(default)]
page_title: String,
}, },
Response { Response {
seq: u64, seq: u64,
@@ -26,6 +34,12 @@ pub enum BrowserMessage {
}, },
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ConversationMessage {
pub role: String,
pub content: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum AgentMessage { pub enum AgentMessage {

View File

@@ -201,3 +201,56 @@ async fn zeroclaw_browser_tool_keeps_domain_validation_in_mac_policy() {
.contains("domain is not allowed") .contains("domain is not allowed")
); );
} }
#[tokio::test]
async fn zeroclaw_browser_tool_rejects_missing_required_action_parameters() {
let (transport, tool) = build_adapter(vec![]);
let missing_click_selector = tool
.execute(json!({
"action": "click",
"expected_domain": "www.baidu.com"
}))
.await
.unwrap();
let missing_text_selector = tool
.execute(json!({
"action": "getText",
"expected_domain": "www.baidu.com"
}))
.await
.unwrap();
let missing_navigate_url = tool
.execute(json!({
"action": "navigate",
"expected_domain": "www.baidu.com"
}))
.await
.unwrap();
assert!(!missing_click_selector.success);
assert!(!missing_text_selector.success);
assert!(!missing_navigate_url.success);
assert_eq!(transport.sent_messages().len(), 0);
assert!(
missing_click_selector
.error
.as_deref()
.unwrap()
.contains("click requires selector")
);
assert!(
missing_text_selector
.error
.as_deref()
.unwrap()
.contains("getText requires selector")
);
assert!(
missing_navigate_url
.error
.as_deref()
.unwrap()
.contains("navigate requires url")
);
}

View File

@@ -15,9 +15,11 @@ use sgclaw::agent::{
handle_browser_message_with_context, handle_browser_message_with_context,
AgentRuntimeContext, AgentRuntimeContext,
}; };
use sgclaw::compat::runtime::execute_task; use sgclaw::compat::runtime::{execute_task, CompatTaskContext};
use sgclaw::config::DeepSeekSettings; use sgclaw::config::DeepSeekSettings;
use sgclaw::pipe::{Action, AgentMessage, BrowserMessage, BrowserPipeTool, Timing}; use sgclaw::pipe::{
Action, AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, Timing,
};
use sgclaw::security::MacPolicy; use sgclaw::security::MacPolicy;
use uuid::Uuid; use uuid::Uuid;
@@ -232,6 +234,7 @@ fn compat_runtime_uses_zeroclaw_provider_path_and_executes_browser_actions() {
transport.as_ref(), transport.as_ref(),
browser_tool, browser_tool,
"打开百度搜索天气", "打开百度搜索天气",
&CompatTaskContext::default(),
&workspace_root, &workspace_root,
&settings, &settings,
) )
@@ -396,6 +399,10 @@ fn handle_browser_message_prefers_compat_runtime_for_supported_instruction_when_
&runtime_context, &runtime_context,
BrowserMessage::SubmitTask { BrowserMessage::SubmitTask {
instruction: "打开百度搜索天气".to_string(), instruction: "打开百度搜索天气".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
}, },
) )
.unwrap(); .unwrap();
@@ -484,6 +491,10 @@ fn handle_browser_message_falls_back_to_compat_runtime_for_unsupported_instructi
&browser_tool, &browser_tool,
BrowserMessage::SubmitTask { BrowserMessage::SubmitTask {
instruction: "帮我打开百度首页".to_string(), instruction: "帮我打开百度首页".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
}, },
) )
.unwrap(); .unwrap();
@@ -509,3 +520,134 @@ fn handle_browser_message_falls_back_to_compat_runtime_for_unsupported_instructi
})); }));
assert_eq!(request_bodies.len(), 2); assert_eq!(request_bodies.len(), 2);
} }
#[test]
fn handle_browser_message_rejects_non_task_greeting_explicitly() {
let transport = Arc::new(MockTransport::new(vec![]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
handle_browser_message(
transport.as_ref(),
&browser_tool,
BrowserMessage::SubmitTask {
instruction: "你好".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
},
)
.unwrap();
let sent = transport.sent_messages();
assert!(matches!(
sent.last(),
Some(AgentMessage::TaskComplete { success, summary })
if !success && summary.contains("浏览器任务入口")
));
}
#[test]
fn compat_runtime_includes_prior_turns_in_follow_up_provider_request() {
let _guard = env_lock().lock().unwrap_or_else(|err| err.into_inner());
let first_response = json!({
"choices": [{
"message": {
"content": "",
"tool_calls": [{
"id": "call_1",
"type": "function",
"function": {
"name": "browser_action",
"arguments": serde_json::to_string(&json!({
"action": "navigate",
"expected_domain": "www.zhihu.com",
"url": "https://www.zhihu.com/search?q=天气&type=content"
})).unwrap()
}
}]
}
}]
});
let second_response = json!({
"choices": [{
"message": {
"content": "已在知乎搜索天气"
}
}]
});
let (base_url, requests, server_handle) =
start_fake_deepseek_server(vec![first_response, second_response]);
let workspace_root = temp_workspace_root();
let settings = DeepSeekSettings {
api_key: "deepseek-test-key".to_string(),
base_url,
model: "deepseek-chat".to_string(),
};
let transport = Arc::new(MockTransport::new(vec![BrowserMessage::Response {
seq: 1,
success: true,
data: json!({ "navigated": true }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 10,
},
}]));
let browser_tool = BrowserPipeTool::new(
transport.clone(),
test_policy(),
vec![1, 2, 3, 4, 5, 6, 7, 8],
)
.with_response_timeout(Duration::from_secs(1));
let task_context = CompatTaskContext {
conversation_id: Some("conversation-1".to_string()),
messages: vec![
ConversationMessage {
role: "user".to_string(),
content: "打开百度搜索天气".to_string(),
},
ConversationMessage {
role: "assistant".to_string(),
content: "已在百度搜索天气".to_string(),
},
],
page_url: Some("https://www.zhihu.com/".to_string()),
page_title: Some("知乎".to_string()),
};
let summary = execute_task(
transport.as_ref(),
browser_tool,
"打开知乎搜索天气",
&task_context,
&workspace_root,
&settings,
)
.unwrap();
server_handle.join().unwrap();
let request_bodies = requests.lock().unwrap().clone();
let first_request_messages = request_bodies[0]["messages"]
.as_array()
.cloned()
.unwrap_or_default();
assert_eq!(summary, "已在知乎搜索天气");
assert!(first_request_messages.iter().any(|message| {
message["role"] == json!("user")
&& message["content"] == json!("打开百度搜索天气")
}));
assert!(first_request_messages.iter().any(|message| {
message["role"] == json!("assistant")
&& message["content"] == json!("已在百度搜索天气")
}));
}

View File

@@ -68,6 +68,10 @@ fn submit_task_sends_three_commands_and_finishes_with_task_complete() {
&tool, &tool,
BrowserMessage::SubmitTask { BrowserMessage::SubmitTask {
instruction: "打开百度搜索天气".to_string(), instruction: "打开百度搜索天气".to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
}, },
) )
.unwrap(); .unwrap();

View File

@@ -8,13 +8,20 @@ type HmacSha256 = Hmac<Sha256>;
#[test] #[test]
fn browser_submit_task_round_trip_uses_task_wire_format() { fn browser_submit_task_round_trip_uses_task_wire_format() {
let raw = r#"{"type":"submit_task","instruction":"打开百度并搜索今日汇率"}"#; let raw = r#"{"type":"submit_task","instruction":"打开百度并搜索今日汇率","conversation_id":"conversation-1","messages":[{"role":"assistant","content":"上一轮完成"}],"page_url":"https://www.baidu.com/","page_title":"百度一下"}"#;
let message: BrowserMessage = serde_json::from_str(raw).unwrap(); let message: BrowserMessage = serde_json::from_str(raw).unwrap();
assert_eq!( assert_eq!(
message, message,
BrowserMessage::SubmitTask { BrowserMessage::SubmitTask {
instruction: "打开百度并搜索今日汇率".to_string(), instruction: "打开百度并搜索今日汇率".to_string(),
conversation_id: "conversation-1".to_string(),
messages: vec![sgclaw::pipe::ConversationMessage {
role: "assistant".to_string(),
content: "上一轮完成".to_string(),
}],
page_url: "https://www.baidu.com/".to_string(),
page_title: "百度一下".to_string(),
} }
); );
assert_eq!(serde_json::to_string(&message).unwrap(), raw); assert_eq!(serde_json::to_string(&message).unwrap(), raw);

View File

@@ -134,6 +134,9 @@ export function normalizeSmokeInstruction(rawInstruction) {
function instructionPlan(instruction) { function instructionPlan(instruction) {
const baiduQuery = extractQuery(instruction, ['打开百度搜索', '打开百度并搜索']) const baiduQuery = extractQuery(instruction, ['打开百度搜索', '打开百度并搜索'])
if (baiduQuery) { if (baiduQuery) {
const url = new URL('https://www.baidu.com/s')
url.searchParams.set('wd', baiduQuery)
return { return {
key: 'baidu', key: 'baidu',
summary: `已在百度搜索${baiduQuery}`, summary: `已在百度搜索${baiduQuery}`,
@@ -141,19 +144,7 @@ function instructionPlan(instruction) {
browserToolCall('call_baidu_1', { browserToolCall('call_baidu_1', {
action: 'navigate', action: 'navigate',
expected_domain: BAIDU_DOMAIN, expected_domain: BAIDU_DOMAIN,
url: BAIDU_URL, url: url.toString(),
}),
browserToolCall('call_baidu_2', {
action: 'type',
expected_domain: BAIDU_DOMAIN,
selector: BAIDU_INPUT_SELECTOR,
text: baiduQuery,
clear_first: true,
}),
browserToolCall('call_baidu_3', {
action: 'click',
expected_domain: BAIDU_DOMAIN,
selector: BAIDU_BUTTON_SELECTOR,
}), }),
], ],
} }

View File

@@ -67,6 +67,29 @@ function assertCompatRuntimeTraffic(requests) {
if (!instructions.includes('打开知乎搜索天气')) { if (!instructions.includes('打开知乎搜索天气')) {
throw new Error('fake DeepSeek server did not receive the Zhihu smoke instruction') throw new Error('fake DeepSeek server did not receive the Zhihu smoke instruction')
} }
const zhihuRequest = requests.find((entry) => {
return (entry.body?.messages ?? []).some((message) =>
message?.role === 'user' &&
normalizeSmokeInstruction(message.content) === '打开知乎搜索天气')
})
if (!zhihuRequest) {
throw new Error('fake DeepSeek server did not receive the Zhihu follow-up turn')
}
const zhihuMessages = zhihuRequest.body?.messages ?? []
const hasPriorUserTurn = zhihuMessages.some((message) =>
message?.role === 'user' &&
normalizeSmokeInstruction(message.content) === '打开百度搜索天气')
const hasPriorAssistantTurn = zhihuMessages.some((message) =>
message?.role === 'assistant' &&
typeof message.content === 'string' &&
message.content.includes('已在百度搜索天气'))
if (!hasPriorUserTurn || !hasPriorAssistantTurn) {
throw new Error('DeepSeek follow-up turn is missing prior browser conversation history')
}
} }
main().catch((error) => { main().catch((error) => {