feat: persist sgclaw browser conversations

This commit is contained in:
zyl
2026-03-27 01:57:42 +08:00
parent bae0e452a5
commit d315c13f66
11 changed files with 402 additions and 20 deletions

View File

@@ -4,8 +4,11 @@ pub mod runtime;
use std::ffi::OsString;
use std::path::PathBuf;
use crate::compat::runtime::CompatTaskContext;
use crate::config::DeepSeekSettings;
use crate::pipe::{AgentMessage, BrowserMessage, BrowserPipeTool, PipeError, Transport};
use crate::pipe::{
AgentMessage, BrowserMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AgentRuntimeContext {
@@ -85,6 +88,41 @@ fn send_mode_log<T: Transport>(transport: &T, mode: &str) -> Result<(), PipeErro
})
}
fn explicit_non_task_response(history: &[ConversationMessage], instruction: &str) -> Option<String> {
if !history.is_empty() {
return None;
}
let trimmed = instruction.trim();
if trimmed.is_empty() {
return Some("sgClaw 目前只处理浏览器任务,请直接描述要打开、搜索、点击或提取的网页操作。".to_string());
}
const TASK_HINTS: &[&str] = &[
"打开", "搜索", "点击", "输入", "导航", "跳转", "访问", "提取", "获取", "网页", "页面",
"标签页", "百度", "知乎", "google", "open", "search", "click", "type", "navigate",
];
if TASK_HINTS.iter().any(|hint| trimmed.contains(hint)) {
return None;
}
const CHITCHAT_INPUTS: &[&str] = &[
"hi", "hello", "hey", "你好", "您好", "", "在吗", "你是谁", "介绍一下你自己",
];
if CHITCHAT_INPUTS
.iter()
.any(|candidate| trimmed.eq_ignore_ascii_case(candidate) || trimmed == *candidate)
{
return Some("sgClaw 现在是浏览器任务入口,不做通用闲聊。请直接说你想在网页上执行什么操作,例如“打开百度搜索天气”。".to_string());
}
if trimmed.chars().count() <= 8 {
return Some("sgClaw 现在只处理浏览器任务。请直接描述网页操作目标,例如“打开知乎搜索天气”或“提取当前页面标题”。".to_string());
}
None
}
fn execute_plan<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
@@ -142,7 +180,36 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
message: BrowserMessage,
) -> Result<(), PipeError> {
match message {
BrowserMessage::SubmitTask { instruction } => {
BrowserMessage::SubmitTask {
instruction,
conversation_id,
messages,
page_url,
page_title,
} => {
if let Some(summary) = explicit_non_task_response(&messages, &instruction) {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary,
});
}
let task_context = CompatTaskContext {
conversation_id: (!conversation_id.trim().is_empty())
.then_some(conversation_id.clone()),
messages,
page_url: (!page_url.trim().is_empty()).then_some(page_url),
page_title: (!page_title.trim().is_empty()).then_some(page_title),
};
if !task_context.messages.is_empty() {
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"continuing conversation with {} prior turns",
task_context.messages.len()
),
});
}
let completion = match context.load_deepseek_settings() {
Ok(Some(settings)) => {
let _ = transport.send(&AgentMessage::LogEntry {
@@ -159,6 +226,7 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
transport,
browser_tool.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {

View File

@@ -105,6 +105,7 @@ fn parse_browser_action_request(args: Value) -> Result<BrowserActionRequest, Bro
let action_name = take_required_string(&mut args, "action")?;
let expected_domain = take_required_string(&mut args, "expected_domain")?;
let action = parse_action(&action_name)?;
validate_action_params(&action_name, &args)?;
Ok(BrowserActionRequest {
action,
@@ -146,6 +147,37 @@ fn failed_tool_result(error: String) -> ToolResult {
}
}
fn validate_action_params(
action_name: &str,
args: &Map<String, Value>,
) -> Result<(), BrowserActionAdapterError> {
match action_name {
"click" | "getText" => require_non_empty_string(args, "selector", action_name),
"type" => {
require_non_empty_string(args, "selector", action_name)?;
require_non_empty_string(args, "text", action_name)
}
"navigate" => require_non_empty_string(args, "url", action_name),
_ => Ok(()),
}
}
fn require_non_empty_string(
args: &Map<String, Value>,
key: &'static str,
action_name: &str,
) -> Result<(), BrowserActionAdapterError> {
match args.get(key) {
Some(Value::String(value)) if !value.trim().is_empty() => Ok(()),
Some(other) => Err(BrowserActionAdapterError::InvalidArguments(format!(
"{action_name} requires a non-empty {key}, got {other}"
))),
None => Err(BrowserActionAdapterError::InvalidArguments(format!(
"{action_name} requires {key}"
))),
}
}
fn format_browser_action_error(data: &Value) -> String {
if let Some(error) = data.get("error") {
if let Some(message) = error.get("message").and_then(Value::as_str) {

View File

@@ -19,12 +19,21 @@ use crate::compat::config_adapter::build_zeroclaw_config_from_settings;
use crate::config::DeepSeekSettings;
use crate::compat::event_bridge::log_entry_for_turn_event;
use crate::compat::memory_adapter::build_memory;
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
use crate::pipe::{BrowserPipeTool, ConversationMessage, PipeError, Transport};
#[derive(Debug, Clone, Default)]
pub struct CompatTaskContext {
pub conversation_id: Option<String>,
pub messages: Vec<ConversationMessage>,
pub page_url: Option<String>,
pub page_title: Option<String>,
}
pub fn execute_task<T: Transport + 'static>(
transport: &T,
browser_tool: BrowserPipeTool<T>,
instruction: &str,
task_context: &CompatTaskContext,
workspace_root: &Path,
settings: &DeepSeekSettings,
) -> Result<String, PipeError> {
@@ -38,6 +47,7 @@ pub fn execute_task<T: Transport + 'static>(
browser_tool,
provider,
instruction,
task_context,
config,
))
}
@@ -47,9 +57,24 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
browser_tool: BrowserPipeTool<T>,
provider: Box<dyn Provider>,
instruction: &str,
task_context: &CompatTaskContext,
config: ZeroClawConfig,
) -> Result<String, PipeError> {
let mut agent = build_agent(browser_tool, provider, &config)?;
if let Some(conversation_id) = task_context
.conversation_id
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
agent.set_memory_session_id(Some(conversation_id.to_string()));
}
let seed_messages = build_seed_history(task_context);
if !seed_messages.is_empty() {
agent.seed_history(&seed_messages);
}
let (event_tx, mut event_rx) = tokio::sync::mpsc::channel::<TurnEvent>(32);
let instruction = instruction.to_string();
@@ -196,3 +221,25 @@ impl Provider for NonStreamingProvider {
stream::empty().boxed()
}
}
fn build_seed_history(task_context: &CompatTaskContext) -> Vec<ChatMessage> {
task_context
.messages
.iter()
.filter_map(to_chat_message)
.collect()
}
fn to_chat_message(message: &ConversationMessage) -> Option<ChatMessage> {
let content = message.content.trim();
if content.is_empty() {
return None;
}
match message.role.as_str() {
"user" => Some(ChatMessage::user(content)),
"assistant" => Some(ChatMessage::assistant(content)),
"system" => Some(ChatMessage::system(content)),
_ => None,
}
}

View File

@@ -5,7 +5,8 @@ pub mod protocol;
pub use browser_tool::{BrowserPipeTool, CommandOutput};
pub use handshake::{perform_handshake, HandshakeResult};
pub use protocol::{
supported_actions, Action, AgentMessage, BrowserMessage, SecurityFields, Timing,
supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage,
SecurityFields, Timing,
};
use std::io::{BufRead, BufReader, Read, Write};

View File

@@ -14,6 +14,14 @@ pub enum BrowserMessage {
},
SubmitTask {
instruction: String,
#[serde(default)]
conversation_id: String,
#[serde(default)]
messages: Vec<ConversationMessage>,
#[serde(default)]
page_url: String,
#[serde(default)]
page_title: String,
},
Response {
seq: u64,
@@ -26,6 +34,12 @@ pub enum BrowserMessage {
},
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ConversationMessage {
pub role: String,
pub content: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum AgentMessage {