feat: add websocket browser service runtime

Wire the service/browser runtime onto the websocket-driven execution path and add the new browser/service modules needed for the submit flow and runtime integration.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
木炎
2026-04-04 23:42:27 +08:00
parent 2ae71fb1c9
commit 3e18350320
33 changed files with 4993 additions and 327 deletions

View File

@@ -1,104 +1,17 @@
pub mod planner;
pub mod runtime;
pub mod task_runner;
use std::ffi::OsString;
use std::path::PathBuf;
use std::sync::Arc;
use crate::compat::config_adapter::resolve_skills_dir_from_sgclaw_settings;
use crate::compat::runtime::CompatTaskContext;
use crate::config::SgClawSettings;
use crate::browser::ws_backend::WsBrowserBackend;
use crate::browser::{BrowserBackend, PipeBrowserBackend};
use crate::pipe::{AgentMessage, BrowserMessage, BrowserPipeTool, PipeError, Transport};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AgentRuntimeContext {
config_path: Option<PathBuf>,
workspace_root: PathBuf,
}
impl AgentRuntimeContext {
pub fn new(config_path: Option<PathBuf>, workspace_root: PathBuf) -> Self {
Self {
config_path,
workspace_root,
}
}
pub fn from_process_args<I, S>(args: I) -> Result<Self, PipeError>
where
I: IntoIterator<Item = S>,
S: Into<OsString>,
{
let mut config_path = None;
let mut args = args.into_iter().map(Into::into);
let _ = args.next();
while let Some(arg) = args.next() {
if arg == OsString::from("--config-path") {
let Some(value) = args.next() else {
return Err(PipeError::Protocol(
"missing value for --config-path".to_string(),
));
};
config_path = Some(PathBuf::from(value));
continue;
}
let arg_string = arg.to_string_lossy();
if let Some(value) = arg_string.strip_prefix("--config-path=") {
config_path = Some(PathBuf::from(value));
}
}
let workspace_root = config_path
.as_ref()
.and_then(|path| path.parent().map(|parent| parent.to_path_buf()))
.unwrap_or_else(default_workspace_root);
Ok(Self::new(config_path, workspace_root))
}
fn load_sgclaw_settings(&self) -> Result<Option<SgClawSettings>, PipeError> {
SgClawSettings::load(self.config_path.as_deref())
.map_err(|err| PipeError::Protocol(err.to_string()))
}
fn settings_source_label(&self) -> String {
match &self.config_path {
Some(path) if path.exists() => path.display().to_string(),
_ => "environment".to_string(),
}
}
}
impl Default for AgentRuntimeContext {
fn default() -> Self {
Self::new(None, default_workspace_root())
}
}
fn default_workspace_root() -> PathBuf {
std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
}
fn send_mode_log<T: Transport>(transport: &T, mode: &str) -> Result<(), PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "mode".to_string(),
message: mode.to_string(),
})
}
fn missing_llm_configuration_summary() -> String {
"未配置大语言模型。请先在 sgclaw_config.json 或环境变量中配置 apiKey、baseUrl 与 model。"
.to_string()
}
fn runtime_version_log_message() -> String {
format!(
"sgclaw runtime version={} protocol={}",
env!("CARGO_PKG_VERSION"),
crate::pipe::protocol::PROTOCOL_VERSION
)
}
pub use task_runner::{
run_submit_task, run_submit_task_with_browser_backend, AgentEventSink, AgentRuntimeContext,
SubmitTaskRequest,
};
fn execute_plan<T: Transport>(
transport: &T,
@@ -127,6 +40,53 @@ fn execute_plan<T: Transport>(
Ok(plan.summary.clone())
}
fn normalize_optional_submit_field(value: String) -> Option<String> {
let trimmed = value.trim();
(!trimmed.is_empty()).then(|| trimmed.to_string())
}
fn browser_backend_for_submit<T: Transport + 'static>(
browser_tool: &BrowserPipeTool<T>,
context: &AgentRuntimeContext,
request: &SubmitTaskRequest,
) -> Result<Arc<dyn BrowserBackend>, PipeError> {
if let Some(browser_ws_url) = configured_browser_ws_url(context) {
return Ok(Arc::new(
WsBrowserBackend::new(
Arc::new(crate::service::browser_ws_client::ServiceWsClient::connect(
&browser_ws_url,
)?),
browser_tool.mac_policy().clone(),
crate::service::browser_ws_client::initial_request_url_for_submit_task(request),
)
.with_response_timeout(browser_tool.response_timeout()),
));
}
Ok(Arc::new(PipeBrowserBackend::from_inner(browser_tool.clone())))
}
fn configured_browser_ws_url(context: &AgentRuntimeContext) -> Option<String> {
std::env::var("SGCLAW_BROWSER_WS_URL")
.ok()
.filter(|value| !value.trim().is_empty())
.or_else(|| {
context
.load_sgclaw_settings()
.ok()
.flatten()
.and_then(|settings| settings.browser_ws_url)
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
})
}
fn send_status_changed<T: Transport>(transport: &T, state: &str) -> Result<(), PipeError> {
transport.send(&AgentMessage::StatusChanged {
state: state.to_string(),
})
}
pub fn execute_task<T: Transport>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
@@ -157,6 +117,9 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
message: BrowserMessage,
) -> Result<(), PipeError> {
match message {
BrowserMessage::Connect => send_status_changed(transport, "connected"),
BrowserMessage::Start => send_status_changed(transport, "started"),
BrowserMessage::Stop => send_status_changed(transport, "stopped"),
BrowserMessage::SubmitTask {
instruction,
conversation_id,
@@ -164,124 +127,15 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
page_url,
page_title,
} => {
let instruction = instruction.trim().to_string();
if instruction.is_empty() {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary: "请输入任务内容。".to_string(),
});
}
let task_context = CompatTaskContext {
conversation_id: (!conversation_id.trim().is_empty())
.then_some(conversation_id.clone()),
let request = SubmitTaskRequest {
instruction,
conversation_id: normalize_optional_submit_field(conversation_id),
messages,
page_url: (!page_url.trim().is_empty()).then_some(page_url),
page_title: (!page_title.trim().is_empty()).then_some(page_title),
page_url: normalize_optional_submit_field(page_url),
page_title: normalize_optional_submit_field(page_title),
};
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: runtime_version_log_message(),
});
if !task_context.messages.is_empty() {
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"continuing conversation with {} prior turns",
task_context.messages.len()
),
});
}
let completion = match context.load_sgclaw_settings() {
Ok(Some(settings)) => {
let resolved_skills_dir =
resolve_skills_dir_from_sgclaw_settings(&context.workspace_root, &settings);
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"DeepSeek config loaded from {} model={} base_url={}",
context.settings_source_label(),
settings.provider_model,
settings.provider_base_url
),
});
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"skills dir resolved to {}",
resolved_skills_dir.display()
),
});
let _ = transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"runtime profile={:?} skills_prompt_mode={:?}",
settings.runtime_profile, settings.skills_prompt_mode
),
});
if crate::compat::orchestration::should_use_primary_orchestration(
&instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
) {
let _ = send_mode_log(transport, "zeroclaw_process_message_primary");
match crate::compat::orchestration::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => {
return transport.send(&AgentMessage::TaskComplete {
success: true,
summary,
})
}
Err(err) => {
return transport.send(&AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
})
}
}
}
let _ = send_mode_log(transport, "compat_llm_primary");
match crate::compat::runtime::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => AgentMessage::TaskComplete {
success: true,
summary,
},
Err(err) => AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
},
}
}
Ok(None) => AgentMessage::TaskComplete {
success: false,
summary: missing_llm_configuration_summary(),
},
Err(err) => {
let _ = transport.send(&AgentMessage::LogEntry {
level: "error".to_string(),
message: format!("failed to load DeepSeek config: {err}"),
});
AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
}
}
};
transport.send(&completion)
let browser_backend = browser_backend_for_submit(browser_tool, context, &request)?;
run_submit_task_with_browser_backend(transport, transport, browser_backend, context, request)
}
BrowserMessage::Init { .. } => {
eprintln!("ignoring duplicate init after handshake");
@@ -293,3 +147,17 @@ pub fn handle_browser_message_with_context<T: Transport + 'static>(
}
}
}
#[cfg(test)]
mod tests {
use super::normalize_optional_submit_field;
#[test]
fn normalize_optional_submit_field_trims_and_drops_blank_values() {
assert_eq!(normalize_optional_submit_field(" \n\t ".to_string()), None);
assert_eq!(
normalize_optional_submit_field(" https://example.com/page ".to_string()),
Some("https://example.com/page".to_string())
);
}
}

385
src/agent/task_runner.rs Normal file
View File

@@ -0,0 +1,385 @@
use std::ffi::OsString;
use std::path::PathBuf;
use std::sync::Arc;
use crate::browser::BrowserBackend;
use crate::compat::config_adapter::resolve_skills_dir_from_sgclaw_settings;
use crate::compat::runtime::CompatTaskContext;
use crate::config::SgClawSettings;
use crate::pipe::{
AgentMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport,
};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AgentRuntimeContext {
config_path: Option<PathBuf>,
workspace_root: PathBuf,
}
impl AgentRuntimeContext {
pub fn new(config_path: Option<PathBuf>, workspace_root: PathBuf) -> Self {
Self {
config_path,
workspace_root,
}
}
pub fn from_process_args<I, S>(args: I) -> Result<Self, PipeError>
where
I: IntoIterator<Item = S>,
S: Into<OsString>,
{
let mut config_path = None;
let mut args = args.into_iter().map(Into::into);
let _ = args.next();
while let Some(arg) = args.next() {
if arg == OsString::from("--config-path") {
let Some(value) = args.next() else {
return Err(PipeError::Protocol(
"missing value for --config-path".to_string(),
));
};
config_path = Some(PathBuf::from(value));
continue;
}
let arg_string = arg.to_string_lossy();
if let Some(value) = arg_string.strip_prefix("--config-path=") {
config_path = Some(PathBuf::from(value));
}
}
let workspace_root = config_path
.as_ref()
.and_then(|path| path.parent().map(|parent| parent.to_path_buf()))
.unwrap_or_else(default_workspace_root);
Ok(Self::new(config_path, workspace_root))
}
pub(crate) fn load_sgclaw_settings(&self) -> Result<Option<SgClawSettings>, PipeError> {
SgClawSettings::load(self.config_path.as_deref())
.map_err(|err| PipeError::Protocol(err.to_string()))
}
fn settings_source_label(&self) -> String {
match &self.config_path {
Some(path) if path.exists() => path.display().to_string(),
_ => "environment".to_string(),
}
}
}
impl Default for AgentRuntimeContext {
fn default() -> Self {
Self::new(None, default_workspace_root())
}
}
fn default_workspace_root() -> PathBuf {
std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct SubmitTaskRequest {
pub instruction: String,
pub conversation_id: Option<String>,
pub messages: Vec<ConversationMessage>,
pub page_url: Option<String>,
pub page_title: Option<String>,
}
pub trait AgentEventSink: Send + Sync {
fn send(&self, message: &AgentMessage) -> Result<(), PipeError>;
}
impl<T: Transport + ?Sized> AgentEventSink for T {
fn send(&self, message: &AgentMessage) -> Result<(), PipeError> {
Transport::send(self, message)
}
}
pub fn run_submit_task<T: Transport + 'static>(
transport: &T,
sink: &dyn AgentEventSink,
browser_tool: &BrowserPipeTool<T>,
context: &AgentRuntimeContext,
request: SubmitTaskRequest,
) -> Result<(), PipeError> {
let SubmitTaskRequest {
instruction,
conversation_id,
messages,
page_url,
page_title,
} = request;
let instruction = instruction.trim().to_string();
if instruction.is_empty() {
return sink.send(&AgentMessage::TaskComplete {
success: false,
summary: "请输入任务内容。".to_string(),
});
}
let task_context = CompatTaskContext {
conversation_id,
messages,
page_url,
page_title,
};
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: runtime_version_log_message(),
});
if !task_context.messages.is_empty() {
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"continuing conversation with {} prior turns",
task_context.messages.len()
),
});
}
let completion = match context.load_sgclaw_settings() {
Ok(Some(settings)) => {
let resolved_skills_dir =
resolve_skills_dir_from_sgclaw_settings(&context.workspace_root, &settings);
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"DeepSeek config loaded from {} model={} base_url={}",
context.settings_source_label(),
settings.provider_model,
settings.provider_base_url
),
});
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("skills dir resolved to {}", resolved_skills_dir.display()),
});
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"runtime profile={:?} skills_prompt_mode={:?}",
settings.runtime_profile, settings.skills_prompt_mode
),
});
if crate::compat::orchestration::should_use_primary_orchestration(
&instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
) {
let _ = send_mode_log(sink, "zeroclaw_process_message_primary");
match crate::compat::orchestration::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => {
return sink.send(&AgentMessage::TaskComplete {
success: true,
summary,
});
}
Err(err) => {
return sink.send(&AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
});
}
}
}
let _ = send_mode_log(sink, "compat_llm_primary");
match crate::compat::runtime::execute_task_with_sgclaw_settings(
transport,
browser_tool.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => AgentMessage::TaskComplete {
success: true,
summary,
},
Err(err) => AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
},
}
}
Ok(None) => AgentMessage::TaskComplete {
success: false,
summary: missing_llm_configuration_summary(),
},
Err(err) => {
let _ = sink.send(&AgentMessage::LogEntry {
level: "error".to_string(),
message: format!("failed to load DeepSeek config: {err}"),
});
AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
}
}
};
sink.send(&completion)
}
pub fn run_submit_task_with_browser_backend<T: Transport + 'static>(
_transport: &T,
sink: &dyn AgentEventSink,
browser_backend: Arc<dyn BrowserBackend>,
context: &AgentRuntimeContext,
request: SubmitTaskRequest,
) -> Result<(), PipeError> {
let SubmitTaskRequest {
instruction,
conversation_id,
messages,
page_url,
page_title,
} = request;
let instruction = instruction.trim().to_string();
if instruction.is_empty() {
return sink.send(&AgentMessage::TaskComplete {
success: false,
summary: "请输入任务内容。".to_string(),
});
}
let task_context = CompatTaskContext {
conversation_id,
messages,
page_url,
page_title,
};
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: runtime_version_log_message(),
});
if !task_context.messages.is_empty() {
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"continuing conversation with {} prior turns",
task_context.messages.len()
),
});
}
let completion = match context.load_sgclaw_settings() {
Ok(Some(settings)) => {
let resolved_skills_dir =
resolve_skills_dir_from_sgclaw_settings(&context.workspace_root, &settings);
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"DeepSeek config loaded from {} model={} base_url={}",
context.settings_source_label(),
settings.provider_model,
settings.provider_base_url
),
});
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("skills dir resolved to {}", resolved_skills_dir.display()),
});
let _ = sink.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!(
"runtime profile={:?} skills_prompt_mode={:?}",
settings.runtime_profile, settings.skills_prompt_mode
),
});
if crate::compat::orchestration::should_use_primary_orchestration(
&instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
) {
let _ = send_mode_log(sink, "zeroclaw_process_message_primary");
match crate::compat::orchestration::execute_task_with_browser_backend(
sink,
browser_backend.clone(),
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => {
return sink.send(&AgentMessage::TaskComplete {
success: true,
summary,
});
}
Err(err) => {
return sink.send(&AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
});
}
}
}
let _ = send_mode_log(sink, "compat_llm_primary");
match crate::compat::runtime::execute_task_with_browser_backend(
sink,
browser_backend,
&instruction,
&task_context,
&context.workspace_root,
&settings,
) {
Ok(summary) => AgentMessage::TaskComplete {
success: true,
summary,
},
Err(err) => AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
},
}
}
Ok(None) => AgentMessage::TaskComplete {
success: false,
summary: missing_llm_configuration_summary(),
},
Err(err) => {
let _ = sink.send(&AgentMessage::LogEntry {
level: "error".to_string(),
message: format!("failed to load DeepSeek config: {err}"),
});
AgentMessage::TaskComplete {
success: false,
summary: err.to_string(),
}
}
};
sink.send(&completion)
}
fn send_mode_log(sink: &dyn AgentEventSink, mode: &str) -> Result<(), PipeError> {
sink.send(&AgentMessage::LogEntry {
level: "mode".to_string(),
message: mode.to_string(),
})
}
fn missing_llm_configuration_summary() -> String {
"未配置大语言模型。请先在 sgclaw_config.json 或环境变量中配置 apiKey、baseUrl 与 model。"
.to_string()
}
fn runtime_version_log_message() -> String {
format!(
"sgclaw runtime version={} protocol={}",
env!("CARGO_PKG_VERSION"),
crate::pipe::protocol::PROTOCOL_VERSION
)
}

10
src/bin/sg_claw.rs Normal file
View File

@@ -0,0 +1,10 @@
use std::process::ExitCode;
fn main() -> ExitCode {
if let Err(err) = sgclaw::service::run() {
eprintln!("sg_claw failed: {err}");
return ExitCode::FAILURE;
}
ExitCode::SUCCESS
}

78
src/bin/sg_claw_client.rs Normal file
View File

@@ -0,0 +1,78 @@
use std::io::{self, BufRead};
use sgclaw::service::{ClientMessage, ServiceMessage};
use tungstenite::{connect, Message};
fn main() -> std::process::ExitCode {
match run() {
Ok(()) => std::process::ExitCode::SUCCESS,
Err(err) => {
eprintln!("sg_claw_client failed: {err}");
std::process::ExitCode::FAILURE
}
}
}
fn parse_request(input: &str) -> (ClientMessage, bool) {
match input.trim() {
"/connect" => (ClientMessage::Connect, true),
"/start" => (ClientMessage::Start, true),
"/stop" => (ClientMessage::Stop, true),
instruction => (
ClientMessage::SubmitTask {
instruction: instruction.to_string(),
conversation_id: String::new(),
messages: vec![],
page_url: String::new(),
page_title: String::new(),
},
false,
),
}
}
fn run() -> Result<(), String> {
let service_url = std::env::var("SG_CLAW_SERVICE_WS_URL")
.unwrap_or_else(|_| "ws://127.0.0.1:42321".to_string());
let (mut socket, _) = connect(service_url.as_str()).map_err(|err| err.to_string())?;
let mut input = String::new();
io::stdin()
.lock()
.read_line(&mut input)
.map_err(|err| err.to_string())?;
let (request, exit_on_status) = parse_request(&input);
let payload = serde_json::to_string(&request).map_err(|err| err.to_string())?;
socket
.send(Message::Text(payload.into()))
.map_err(|err| err.to_string())?;
loop {
match socket.read().map_err(|err| err.to_string())? {
Message::Text(text) => {
let message: ServiceMessage =
serde_json::from_str(&text).map_err(|err| err.to_string())?;
match message {
ServiceMessage::StatusChanged { state } => {
println!("status: {state}");
if exit_on_status {
return Ok(());
}
}
ServiceMessage::LogEntry { level: _, message } => {
println!("{message}");
}
ServiceMessage::TaskComplete { success: _, summary } => {
println!("{summary}");
return Ok(());
}
ServiceMessage::Busy { message } => return Err(message),
}
}
Message::Close(_) => return Err("service disconnected before task completion".to_string()),
_ => {}
}
}
}

View File

@@ -0,0 +1,70 @@
use std::env;
use std::process::ExitCode;
use std::time::Duration;
use sgclaw::{parse_probe_args, run_probe_script, ProbeOutcome};
fn main() -> ExitCode {
match run() {
Ok(()) => ExitCode::SUCCESS,
Err(err) => {
eprintln!("sgbrowser_ws_probe failed: {err}");
ExitCode::FAILURE
}
}
}
fn run() -> Result<(), String> {
let args: Vec<String> = env::args().skip(1).collect();
let config = match parse_probe_args(&args) {
Ok(config) => config,
Err(err) => return Err(err.to_string()),
};
let results = match run_probe_script(
&config.ws_url,
Duration::from_millis(config.timeout_ms),
config.steps,
) {
Ok(results) => results,
Err(err) => return Err(err.to_string()),
};
for (index, result) in results.iter().enumerate() {
println!("STEP {} {}", index + 1, result.label);
println!("SEND: {}", result.sent);
match &result.outcome {
ProbeOutcome::Received(frames) => {
if frames.is_empty() {
println!("RECV: <none>");
} else {
for frame in frames {
println!("RECV: {}", frame);
}
}
println!("OUTCOME: received");
}
ProbeOutcome::NoReplyExpected => {
println!("RECV: <none>");
println!("OUTCOME: no-reply-expected");
}
ProbeOutcome::TimedOut => {
println!("RECV: <none>");
println!("OUTCOME: timeout");
}
ProbeOutcome::Closed => {
println!("RECV: <none>");
println!("OUTCOME: closed");
}
ProbeOutcome::ConnectFailed(message) => {
println!("RECV: <none>");
println!("OUTCOME: connect-failed");
println!("DETAIL: {}", message);
}
}
if index + 1 < results.len() {
println!();
}
}
Ok(())
}

39
src/browser/backend.rs Normal file
View File

@@ -0,0 +1,39 @@
use std::sync::Arc;
use serde_json::Value;
use crate::pipe::{Action, CommandOutput, ExecutionSurfaceMetadata, PipeError};
pub trait BrowserBackend: Send + Sync {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError>;
fn surface_metadata(&self) -> ExecutionSurfaceMetadata;
fn supports_eval(&self) -> bool {
true
}
}
impl<T: BrowserBackend + ?Sized> BrowserBackend for Arc<T> {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError> {
self.as_ref().invoke(action, params, expected_domain)
}
fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.as_ref().surface_metadata()
}
fn supports_eval(&self) -> bool {
self.as_ref().supports_eval()
}
}

View File

@@ -0,0 +1,66 @@
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use serde_json::Value;
use crate::browser::backend::BrowserBackend;
use crate::browser::bridge_contract::{BridgeBrowserActionReply, BridgeBrowserActionRequest};
use crate::browser::bridge_transport::BridgeActionTransport;
use crate::pipe::{Action, CommandOutput, ExecutionSurfaceMetadata, PipeError};
use crate::security::MacPolicy;
pub struct BridgeBrowserBackend {
transport: Arc<dyn BridgeActionTransport>,
mac_policy: MacPolicy,
next_seq: AtomicU64,
}
impl BridgeBrowserBackend {
pub fn new(transport: Arc<dyn BridgeActionTransport>, mac_policy: MacPolicy) -> Self {
Self {
transport,
mac_policy,
next_seq: AtomicU64::new(1),
}
}
}
impl BrowserBackend for BridgeBrowserBackend {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError> {
self.mac_policy.validate(&action, expected_domain)?;
let seq = self.next_seq.fetch_add(1, Ordering::Relaxed);
let reply = self.transport.execute(BridgeBrowserActionRequest::new(
action.as_str(),
params,
expected_domain,
))?;
match reply {
BridgeBrowserActionReply::Success(success) => Ok(CommandOutput {
seq,
success: true,
data: success.data,
aom_snapshot: success.aom_snapshot,
timing: success.timing,
}),
BridgeBrowserActionReply::Error(error) => Err(PipeError::Protocol(format!(
"bridge action failed: {}",
error.message
))),
}
}
fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.mac_policy.privileged_surface_metadata()
}
fn supports_eval(&self) -> bool {
self.mac_policy.supports_pipe_action(&Action::Eval)
}
}

View File

@@ -0,0 +1,63 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::pipe::Timing;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BridgeLifecycleCall {
Connect,
Start,
Stop,
SubmitTask,
}
impl BridgeLifecycleCall {
pub fn bridge_name(self) -> &'static str {
match self {
Self::Connect => "sgclawConnect",
Self::Start => "sgclawStart",
Self::Stop => "sgclawStop",
Self::SubmitTask => "sgclawSubmitTask",
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BridgeBrowserActionRequest {
pub action: String,
pub params: Value,
pub expected_domain: String,
}
impl BridgeBrowserActionRequest {
pub fn new(
action: impl Into<String>,
params: Value,
expected_domain: impl Into<String>,
) -> Self {
Self {
action: action.into(),
params,
expected_domain: expected_domain.into(),
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum BridgeBrowserActionReply {
Success(BridgeBrowserActionSuccess),
Error(BridgeBrowserActionError),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BridgeBrowserActionSuccess {
pub data: Value,
pub aom_snapshot: Vec<Value>,
pub timing: Timing,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BridgeBrowserActionError {
pub message: String,
pub details: Value,
}

View File

@@ -0,0 +1,9 @@
use crate::browser::bridge_contract::{BridgeBrowserActionReply, BridgeBrowserActionRequest};
use crate::pipe::PipeError;
pub trait BridgeActionTransport: Send + Sync {
fn execute(
&self,
request: BridgeBrowserActionRequest,
) -> Result<BridgeBrowserActionReply, PipeError>;
}

View File

@@ -0,0 +1,301 @@
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use crate::browser::backend::BrowserBackend;
use crate::pipe::{Action, CommandOutput, ExecutionSurfaceMetadata, PipeError, Timing};
use crate::security::MacPolicy;
const NAVIGATE_CALLBACK_NAME: &str = "sgclawOnLoaded";
const GET_TEXT_CALLBACK_NAME: &str = "sgclawOnGetText";
const EVAL_CALLBACK_NAME: &str = "sgclawOnEval";
const SHOW_AREA: &str = "show";
pub trait BrowserCallbackHost: Send + Sync {
fn execute(&self, request: BrowserCallbackRequest) -> Result<BrowserCallbackResponse, PipeError>;
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BrowserCallbackRequest {
pub seq: u64,
pub request_url: String,
pub expected_domain: String,
pub action: String,
pub command: Value,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum BrowserCallbackResponse {
Success(BrowserCallbackSuccess),
Error(BrowserCallbackError),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BrowserCallbackSuccess {
pub success: bool,
pub data: Value,
pub aom_snapshot: Vec<Value>,
pub timing: Timing,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BrowserCallbackError {
pub message: String,
pub details: Value,
}
pub struct BrowserCallbackBackend {
host: Arc<dyn BrowserCallbackHost>,
mac_policy: MacPolicy,
helper_page_url: String,
current_target_url: Mutex<Option<String>>,
next_seq: AtomicU64,
}
impl BrowserCallbackBackend {
pub fn new(
host: Arc<dyn BrowserCallbackHost>,
mac_policy: MacPolicy,
helper_page_url: impl Into<String>,
) -> Self {
Self {
host,
mac_policy,
helper_page_url: helper_page_url.into(),
current_target_url: Mutex::new(None),
next_seq: AtomicU64::new(1),
}
}
fn build_command(&self, action: &Action, params: &Value) -> Result<Value, PipeError> {
match action {
Action::Navigate => {
let target_url = required_string(params, "url")?;
// Use sgBrowerserOpenPage to open the target URL in a **new**
// visible browser tab. This keeps the helper page alive so its
// WebSocket connection, command polling, and callback functions
// remain functional for subsequent GetText / Eval commands.
//
// sgBrowserCallAfterLoaded would navigate the helper page tab
// itself to the target URL, destroying all helper-page JS
// context and making further communication impossible.
//
// sgBrowerserOpenPage does not fire a JS callback; the callback
// host will treat the navigate action as fire-and-forget and
// return success once the command has been forwarded.
Ok(json!([
self.helper_page_url,
"sgBrowerserOpenPage",
target_url,
]))
}
Action::GetText => {
let target_url = self.target_url(action, params)?;
let domain = extract_domain(&target_url)?;
let selector = required_string(params, "selector")?;
let js_code = build_get_text_js(&self.helper_page_url, &selector);
// Use sgBrowserExcuteJsCodeByDomain (API #25) which matches
// pages by domain rather than exact URL. This is far more
// robust than sgBrowserExcuteJsCodeByArea because the actual
// page URL may differ from what we navigated to (redirects,
// query parameters, etc.).
Ok(json!([
self.helper_page_url,
"sgBrowserExcuteJsCodeByDomain",
domain,
js_code,
SHOW_AREA,
]))
}
Action::Eval => {
let target_url = self.target_url(action, params)?;
let domain = extract_domain(&target_url)?;
let script = required_string(params, "script")?;
let js_code = build_eval_js(&self.helper_page_url, &script);
Ok(json!([
self.helper_page_url,
"sgBrowserExcuteJsCodeByDomain",
domain,
js_code,
SHOW_AREA,
]))
}
_ => Err(PipeError::Protocol(format!(
"unsupported callback-host browser action: {}",
action.as_str()
))),
}
}
fn target_url(&self, action: &Action, params: &Value) -> Result<String, PipeError> {
if let Some(target_url) = params
.get("target_url")
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToString::to_string)
{
return Ok(target_url);
}
self.current_target_url
.lock()
.map_err(|_| PipeError::Protocol("callback backend target url lock poisoned".to_string()))?
.clone()
.ok_or_else(|| PipeError::Protocol(format!("target_url is required for {}", action.as_str())))
}
}
impl BrowserBackend for BrowserCallbackBackend {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError> {
self.mac_policy.validate(&action, expected_domain)?;
let seq = self.next_seq.fetch_add(1, Ordering::Relaxed);
let reply = self.host.execute(BrowserCallbackRequest {
seq,
request_url: self.helper_page_url.clone(),
expected_domain: expected_domain.to_string(),
action: action.as_str().to_string(),
command: self.build_command(&action, &params)?,
})?;
match reply {
BrowserCallbackResponse::Success(success) => {
if matches!(action, Action::Navigate) {
if let Some(url) = params
.get("url")
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
{
*self.current_target_url.lock().map_err(|_| {
PipeError::Protocol("callback backend target url lock poisoned".to_string())
})? = Some(url.to_string());
}
}
Ok(CommandOutput {
seq,
success: success.success,
data: success.data,
aom_snapshot: success.aom_snapshot,
timing: success.timing,
})
}
BrowserCallbackResponse::Error(error) => Err(PipeError::Protocol(format!(
"callback host browser action failed: {} ({})",
error.message, error.details
))),
}
}
fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.mac_policy.privileged_surface_metadata()
}
fn supports_eval(&self) -> bool {
self.mac_policy.supports_pipe_action(&Action::Eval)
}
}
fn required_string(params: &Value, key: &str) -> Result<String, PipeError> {
params
.get(key)
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToString::to_string)
.ok_or_else(|| PipeError::Protocol(format!("{key} is required")))
}
fn build_get_text_js(source_url: &str, selector: &str) -> String {
let escaped_source_url = escape_js_single_quoted(source_url);
let escaped_selector = escape_js_single_quoted(selector);
let callback = GET_TEXT_CALLBACK_NAME;
let events_url = escape_js_single_quoted(&events_endpoint_url(source_url));
// Three delivery paths for getting the result back to the callback host:
//
// 1. callBackJsToCpp (API #40) — browser-native IPC that routes the
// callback function to the helper page.
// 2. XMLHttpRequest POST to callback host — localhost (127.0.0.1) is
// exempt from mixed-content restrictions in Chromium.
// 3. navigator.sendBeacon fallback — same localhost exemption.
//
// The XHR / sendBeacon paths POST the event DIRECTLY in the format the
// callback host expects (callback="sgclawOnGetText", payload={text:...})
// so normalize_callback_result can process it via Path A.
format!(
"(function(){{try{{\
var el=document.querySelector('{escaped_selector}');\
var t=el?((el.innerText||el.textContent||'').trim()):'';\
try{{callBackJsToCpp('{escaped_source_url}@_@'+window.location.href+'@_@{callback}@_@sgBrowserExcuteJsCodeByDomain@_@'+t)}}catch(_){{}}\
var j=JSON.stringify({{type:'callback',callback:'{callback}',request_url:'{escaped_source_url}',payload:{{text:t}}}});\
try{{var r=new XMLHttpRequest();r.open('POST','{events_url}',true);r.setRequestHeader('Content-Type','application/json');r.send(j)}}catch(_){{}}\
try{{navigator.sendBeacon('{events_url}',new Blob([j],{{type:'application/json'}}))}}catch(_){{}}\
}}catch(e){{}}}})()"
)
}
fn build_eval_js(source_url: &str, script: &str) -> String {
let escaped_source_url = escape_js_single_quoted(source_url);
let callback = EVAL_CALLBACK_NAME;
let events_url = escape_js_single_quoted(&events_endpoint_url(source_url));
format!(
"(function(){{try{{var v=(function(){{return {script}}})();\
var t=(typeof v==='string')?v:JSON.stringify(v);\
try{{callBackJsToCpp('{escaped_source_url}@_@'+window.location.href+'@_@{callback}@_@sgBrowserExcuteJsCodeByDomain@_@'+(t??''))}}catch(_){{}}\
var j=JSON.stringify({{type:'callback',callback:'{callback}',request_url:'{escaped_source_url}',payload:{{value:(t??'')}}}});\
try{{var r=new XMLHttpRequest();r.open('POST','{events_url}',true);r.setRequestHeader('Content-Type','application/json');r.send(j)}}catch(_){{}}\
try{{navigator.sendBeacon('{events_url}',new Blob([j],{{type:'application/json'}}))}}catch(_){{}}\
}}catch(e){{}}}})()"
)
}
/// Derive the callback host events endpoint URL from the helper page URL.
/// e.g. "http://127.0.0.1:62819/sgclaw/browser-helper.html"
/// → "http://127.0.0.1:62819/sgclaw/callback/events"
fn events_endpoint_url(helper_page_url: &str) -> String {
let origin = helper_page_url
.find("://")
.and_then(|scheme_end| {
helper_page_url[scheme_end + 3..]
.find('/')
.map(|path_start| &helper_page_url[..scheme_end + 3 + path_start])
})
.unwrap_or(helper_page_url);
format!("{origin}/sgclaw/callback/events")
}
/// Extract the domain from a URL.
/// e.g. "https://www.zhihu.com/hot" → "www.zhihu.com"
fn extract_domain(url: &str) -> Result<String, PipeError> {
let after_scheme = url
.find("://")
.map(|i| &url[i + 3..])
.unwrap_or(url);
let domain = after_scheme
.split('/')
.next()
.unwrap_or(after_scheme)
.split(':')
.next()
.unwrap_or(after_scheme);
if domain.is_empty() {
return Err(PipeError::Protocol(format!(
"failed to extract domain from URL: {url}"
)));
}
Ok(domain.to_string())
}
fn escape_js_single_quoted(raw: &str) -> String {
raw.replace('\\', "\\\\").replace('\'', "\\'")
}

1105
src/browser/callback_host.rs Normal file

File diff suppressed because it is too large Load Diff

19
src/browser/mod.rs Normal file
View File

@@ -0,0 +1,19 @@
pub mod bridge_backend;
pub mod bridge_contract;
pub mod bridge_transport;
pub mod callback_backend;
mod backend;
pub(crate) mod callback_host;
mod pipe_backend;
pub mod ws_backend;
pub mod ws_probe;
pub mod ws_protocol;
pub use backend::BrowserBackend;
pub use bridge_backend::BridgeBrowserBackend;
pub use callback_backend::{
BrowserCallbackBackend, BrowserCallbackError, BrowserCallbackHost,
BrowserCallbackRequest, BrowserCallbackResponse, BrowserCallbackSuccess,
};
pub use pipe_backend::PipeBrowserBackend;
pub use ws_backend::WsBrowserBackend;

View File

@@ -0,0 +1,55 @@
use std::sync::Arc;
use serde_json::Value;
use crate::browser::BrowserBackend;
use crate::pipe::{Action, BrowserPipeTool, CommandOutput, ExecutionSurfaceMetadata, PipeError, Transport};
use crate::security::MacPolicy;
pub struct PipeBrowserBackend<T: Transport> {
inner: BrowserPipeTool<T>,
}
impl<T: Transport> PipeBrowserBackend<T> {
pub fn new(transport: Arc<T>, mac_policy: MacPolicy, session_key: Vec<u8>) -> Self {
Self {
inner: BrowserPipeTool::new(transport, mac_policy, session_key),
}
}
pub fn from_inner(inner: BrowserPipeTool<T>) -> Self {
Self { inner }
}
pub fn with_response_timeout(mut self, response_timeout: std::time::Duration) -> Self {
self.inner = self.inner.with_response_timeout(response_timeout);
self
}
}
impl<T: Transport> Clone for PipeBrowserBackend<T> {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
}
}
}
impl<T: Transport> BrowserBackend for PipeBrowserBackend<T> {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError> {
self.inner.invoke(action, params, expected_domain)
}
fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.inner.surface_metadata()
}
fn supports_eval(&self) -> bool {
self.inner.supports_eval()
}
}

158
src/browser/ws_backend.rs Normal file
View File

@@ -0,0 +1,158 @@
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use serde_json::{json, Value};
use crate::browser::{ws_protocol, BrowserBackend};
use crate::pipe::{Action, CommandOutput, ExecutionSurfaceMetadata, PipeError, Timing};
use crate::security::MacPolicy;
pub trait WsClient: Send + Sync {
fn send_text(&self, payload: &str) -> Result<(), PipeError>;
fn recv_text_timeout(&self, timeout: Duration) -> Result<String, PipeError>;
}
pub struct WsBrowserBackend<C: WsClient> {
client: Arc<C>,
mac_policy: MacPolicy,
request_url: Mutex<String>,
next_seq: AtomicU64,
response_timeout: Duration,
in_flight: Mutex<()>,
}
impl<C: WsClient> WsBrowserBackend<C> {
pub fn new(client: Arc<C>, mac_policy: MacPolicy, request_url: impl Into<String>) -> Self {
Self {
client,
mac_policy,
request_url: Mutex::new(request_url.into()),
next_seq: AtomicU64::new(1),
response_timeout: Duration::from_secs(30),
in_flight: Mutex::new(()),
}
}
pub fn with_response_timeout(mut self, response_timeout: Duration) -> Self {
self.response_timeout = response_timeout;
self
}
}
impl<C: WsClient> BrowserBackend for WsBrowserBackend<C> {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError> {
let _guard = self
.in_flight
.lock()
.map_err(|_| PipeError::Protocol("browser ws request lock poisoned".to_string()))?;
self.mac_policy.validate(&action, expected_domain)?;
let seq = self.next_seq.fetch_add(1, Ordering::Relaxed);
let request_id = seq.to_string();
let request_url = self
.request_url
.lock()
.map_err(|_| PipeError::Protocol("browser ws request url lock poisoned".to_string()))?
.clone();
let encoded = ws_protocol::encode_v1_action(
&action,
&params,
&request_url,
Some(request_id.as_str()),
)?;
self.client.send_text(&encoded.payload)?;
let status = Some(recv_status_frame(&*self.client, self.response_timeout)?);
if let Some(status) = status {
let status_code = parse_status_code(&status)?;
if status_code != 0 {
return Err(PipeError::Protocol(format!(
"browser returned non-zero status: {status_code}"
)));
}
}
if action == Action::Navigate {
if let Some(url) = params.get("url").and_then(Value::as_str) {
let mut request_url = self.request_url.lock().map_err(|_| {
PipeError::Protocol("browser ws request url lock poisoned".to_string())
})?;
*request_url = url.to_string();
}
}
if let Some(callback) = encoded.callback {
loop {
let frame = self.client.recv_text_timeout(self.response_timeout)?;
let decoded = ws_protocol::decode_callback_frame(&frame)?;
if decoded.callback_name == callback.callback_name {
return Ok(CommandOutput {
seq,
success: true,
data: json!({ "text": decoded.response_text }),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 0,
exec_ms: 0,
},
});
}
}
}
Ok(CommandOutput {
seq,
success: true,
data: json!({}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 0,
exec_ms: 0,
},
})
}
fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.mac_policy.privileged_surface_metadata()
}
fn supports_eval(&self) -> bool {
self.mac_policy.supports_pipe_action(&Action::Eval)
}
}
fn parse_status_code(raw: &str) -> Result<i64, PipeError> {
raw.trim()
.parse::<i64>()
.map_err(|_| PipeError::Protocol(format!("invalid browser status frame: {raw}")))
}
fn recv_status_frame(client: &dyn WsClient, timeout: Duration) -> Result<String, PipeError> {
loop {
let frame = client.recv_text_timeout(timeout)?;
if is_ignorable_status_prelude(&frame) {
continue;
}
return Ok(frame);
}
}
fn is_ignorable_status_prelude(frame: &str) -> bool {
let trimmed = frame.trim();
if trimmed.starts_with("Welcome!") || trimmed.starts_with("Welcome ") {
return true;
}
serde_json::from_str::<Value>(trimmed)
.ok()
.and_then(|value| value.get("type").and_then(Value::as_str).map(str::to_string))
.is_some_and(|kind| kind == "welcome")
}

307
src/browser/ws_probe.rs Normal file
View File

@@ -0,0 +1,307 @@
use std::net::TcpStream;
use std::time::Duration;
use thiserror::Error;
use tungstenite::stream::MaybeTlsStream;
use tungstenite::{connect, Message, WebSocket};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ProbeStep {
pub label: String,
pub payload: String,
pub expect_reply: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ProbeOutcome {
Received(Vec<String>),
NoReplyExpected,
TimedOut,
Closed,
ConnectFailed(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ProbeStepResult {
pub label: String,
pub sent: String,
pub outcome: ProbeOutcome,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ProbeCliConfig {
pub ws_url: String,
pub timeout_ms: u64,
pub steps: Vec<ProbeStep>,
}
const DEFAULT_TIMEOUT_MS: u64 = 1500;
const DEFAULT_REGISTER_STEP_LABEL: &str = "register";
const DEFAULT_REGISTER_STEP_PAYLOAD: &str = r#"{"type":"register","role":"web"}"#;
#[derive(Debug, Error)]
pub enum ProbeError {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("probe timeout while waiting for websocket frame")]
Timeout,
#[error("probe websocket closed")]
Closed,
#[error("probe protocol error: {0}")]
Protocol(String),
#[error("probe argument error: {0}")]
Args(String),
}
pub fn parse_probe_args(args: &[String]) -> Result<ProbeCliConfig, ProbeError> {
let mut ws_url = None;
let mut timeout_ms = None;
let mut steps = Vec::new();
let mut index = 0;
while index < args.len() {
match args[index].as_str() {
"--ws-url" => {
index += 1;
let value = args
.get(index)
.ok_or_else(|| ProbeError::Args("missing value for --ws-url".to_string()))?;
ws_url = Some(value.clone());
}
"--timeout-ms" => {
index += 1;
let value = args.get(index).ok_or_else(|| {
ProbeError::Args("missing value for --timeout-ms".to_string())
})?;
let parsed = value.parse::<u64>().map_err(|_| {
ProbeError::Args(format!("invalid --timeout-ms value: {value}"))
})?;
timeout_ms = Some(parsed);
}
"--step" => {
index += 1;
let value = args
.get(index)
.ok_or_else(|| ProbeError::Args("missing value for --step".to_string()))?;
let (label, payload) = value.split_once("::").ok_or_else(|| {
ProbeError::Args(format!(
"invalid --step value (expected <label>::<payload>): {value}"
))
})?;
if label.is_empty() {
return Err(ProbeError::Args("step label must not be empty".to_string()));
}
if payload.is_empty() {
return Err(ProbeError::Args("step payload must not be empty".to_string()));
}
steps.push(ProbeStep {
label: label.to_string(),
payload: payload.to_string(),
expect_reply: true,
});
}
flag => {
return Err(ProbeError::Args(format!("unknown argument: {flag}")));
}
}
index += 1;
}
let ws_url = ws_url.ok_or_else(|| ProbeError::Args("missing required --ws-url".to_string()))?;
validate_ws_url(&ws_url)?;
let timeout_ms = timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS);
if steps.is_empty() {
steps.push(ProbeStep {
label: DEFAULT_REGISTER_STEP_LABEL.to_string(),
payload: DEFAULT_REGISTER_STEP_PAYLOAD.to_string(),
expect_reply: true,
});
}
Ok(ProbeCliConfig {
ws_url,
timeout_ms,
steps,
})
}
fn validate_ws_url(ws_url: &str) -> Result<(), ProbeError> {
if ws_url.starts_with("ws://") {
return Ok(());
}
Err(ProbeError::Args(format!(
"unsupported --ws-url scheme (only ws:// is supported for this probe): {ws_url}"
)))
}
pub fn run_probe_script(
ws_url: &str,
timeout: Duration,
steps: Vec<ProbeStep>,
) -> Result<Vec<ProbeStepResult>, ProbeError> {
let mut socket = match connect(ws_url) {
Ok((socket, _)) => socket,
Err(err) => {
let message = err.to_string();
return Ok(steps
.into_iter()
.map(|step| ProbeStepResult {
label: step.label,
sent: step.payload,
outcome: ProbeOutcome::ConnectFailed(message.clone()),
})
.collect());
}
};
configure_socket_timeout(&mut socket, timeout)?;
let mut results = Vec::with_capacity(steps.len());
for step in steps {
let ProbeStep {
label,
payload,
expect_reply,
} = step;
let send_outcome = match socket.send(Message::Text(payload.clone().into())) {
Ok(()) => None,
Err(err) => Some(map_websocket_error(err, "browser websocket send")),
};
let outcome = match send_outcome {
Some(ProbeError::Timeout) => ProbeOutcome::TimedOut,
Some(ProbeError::Closed) => ProbeOutcome::Closed,
Some(err) => return Err(err),
None if expect_reply => match read_probe_frames(&mut socket) {
Ok(frames) => ProbeOutcome::Received(frames),
Err(ProbeError::Timeout) => ProbeOutcome::TimedOut,
Err(ProbeError::Closed) => ProbeOutcome::Closed,
Err(err) => return Err(err),
},
None => ProbeOutcome::NoReplyExpected,
};
results.push(ProbeStepResult {
label,
sent: payload,
outcome,
});
}
Ok(results)
}
fn configure_socket_timeout(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
timeout: Duration,
) -> Result<(), ProbeError> {
match websocket.get_mut() {
MaybeTlsStream::Plain(stream) => {
stream.set_read_timeout(Some(timeout))?;
stream.set_write_timeout(Some(timeout))?;
Ok(())
}
_ => Ok(()),
}
}
fn read_probe_frames(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
) -> Result<Vec<String>, ProbeError> {
let first_frame = read_probe_frame(websocket)?;
let mut frames = vec![first_frame];
let Some(original_timeout) = get_plain_read_timeout(websocket)? else {
return Ok(frames);
};
set_plain_read_timeout(websocket, Some(Duration::from_millis(1)))?;
loop {
match read_probe_frame(websocket) {
Ok(frame) => frames.push(frame),
Err(ProbeError::Timeout) | Err(ProbeError::Closed) => break,
Err(err) => {
set_plain_read_timeout(websocket, original_timeout)?;
return Err(err);
}
}
}
set_plain_read_timeout(websocket, original_timeout)?;
Ok(frames)
}
fn get_plain_read_timeout(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
) -> Result<Option<Option<Duration>>, ProbeError> {
match websocket.get_mut() {
MaybeTlsStream::Plain(stream) => Ok(Some(stream.read_timeout()?)),
_ => Ok(None),
}
}
fn set_plain_read_timeout(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
timeout: Option<Duration>,
) -> Result<(), ProbeError> {
match websocket.get_mut() {
MaybeTlsStream::Plain(stream) => {
stream.set_read_timeout(timeout)?;
Ok(())
}
_ => Ok(()),
}
}
fn read_probe_frame(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
) -> Result<String, ProbeError> {
loop {
match websocket.read() {
Ok(Message::Text(text)) => return Ok(text.to_string()),
Ok(Message::Close(_)) => return Err(ProbeError::Closed),
Ok(Message::Ping(payload)) => {
websocket
.send(Message::Pong(payload))
.map_err(|err| map_websocket_error(err, "browser websocket pong"))?;
}
Ok(_) => {}
Err(err) => return Err(map_websocket_error(err, "browser websocket read")),
}
}
}
fn map_websocket_error(err: tungstenite::Error, operation: &str) -> ProbeError {
match err {
tungstenite::Error::ConnectionClosed
| tungstenite::Error::AlreadyClosed
| tungstenite::Error::Protocol(tungstenite::error::ProtocolError::ResetWithoutClosingHandshake)
| tungstenite::Error::Protocol(tungstenite::error::ProtocolError::SendAfterClosing) => {
ProbeError::Closed
}
tungstenite::Error::Io(io_err)
if matches!(
io_err.kind(),
std::io::ErrorKind::TimedOut | std::io::ErrorKind::WouldBlock
) =>
{
ProbeError::Timeout
}
tungstenite::Error::Io(io_err)
if matches!(
io_err.kind(),
std::io::ErrorKind::ConnectionAborted
| std::io::ErrorKind::ConnectionReset
| std::io::ErrorKind::BrokenPipe
| std::io::ErrorKind::UnexpectedEof
) =>
{
ProbeError::Closed
}
tungstenite::Error::Io(io_err) => ProbeError::Io(io_err),
other => ProbeError::Protocol(format!("{operation} failed: {other}")),
}
}

306
src/browser/ws_protocol.rs Normal file
View File

@@ -0,0 +1,306 @@
use serde_json::{json, Value};
use crate::pipe::{Action, PipeError};
const CALLBACK_DELIMITER: &str = "@_@";
const CALLBACK_PREFIX: &str = "sgclaw_cb_";
const JS_AREA_HIDE: &str = "hide";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CallbackCorrelation {
pub request_id: String,
pub callback_name: String,
pub source_url: String,
pub target_url: String,
pub action_url: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EncodedWsRequest {
pub payload: String,
pub callback: Option<CallbackCorrelation>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecodedCallback {
pub source_url: String,
pub target_url: String,
pub callback_name: String,
pub action_url: String,
pub response_text: String,
}
pub fn encode_v1_action(
action: &Action,
params: &Value,
request_url: &str,
request_id: Option<&str>,
) -> Result<EncodedWsRequest, PipeError> {
match action {
Action::Navigate => encode_navigate(params, request_url, request_id),
Action::Click => encode_click(params, request_url),
Action::Type => encode_type(params, request_url),
Action::GetText => encode_get_text(params, request_url, request_id),
Action::Eval => encode_eval(params, request_url, request_id),
_ => Err(PipeError::Protocol(format!(
"unsupported browser ws action: {}",
action.as_str()
))),
}
}
pub fn decode_callback_frame(frame: &str) -> Result<DecodedCallback, PipeError> {
let payload: Value = serde_json::from_str(frame)?;
let array = payload.as_array().ok_or_else(|| {
PipeError::Protocol("callback frame must be a JSON array".to_string())
})?;
if array.len() != 3 {
return Err(PipeError::Protocol(
"callback frame must contain [requesturl, function, payload]".to_string(),
));
}
let function_name = array[1].as_str().ok_or_else(|| {
PipeError::Protocol("callback frame function name must be a string".to_string())
})?;
if function_name != "callBackJsToCpp" {
return Err(PipeError::Protocol(
"callback frame must target callBackJsToCpp".to_string(),
));
}
let param = array[2].as_str().ok_or_else(|| {
PipeError::Protocol("callback payload must be a string".to_string())
})?;
let mut parts = param.splitn(5, CALLBACK_DELIMITER);
let source_url = parts.next().unwrap_or_default();
let target_url = parts.next().unwrap_or_default();
let callback_name = parts.next().unwrap_or_default();
let action_url = parts.next().unwrap_or_default();
let response_text = parts.next().unwrap_or_default();
if source_url.is_empty()
|| target_url.is_empty()
|| callback_name.is_empty()
|| action_url.is_empty()
|| response_text.is_empty() && !param.ends_with(CALLBACK_DELIMITER)
{
return Err(PipeError::Protocol(
"malformed callback payload".to_string(),
));
}
Ok(DecodedCallback {
source_url: source_url.to_string(),
target_url: target_url.to_string(),
callback_name: callback_name.to_string(),
action_url: action_url.to_string(),
response_text: response_text.to_string(),
})
}
fn encode_navigate(
params: &Value,
request_url: &str,
request_id: Option<&str>,
) -> Result<EncodedWsRequest, PipeError> {
let url = required_string(params, "url")?;
let callback = callback_metadata(
request_id,
request_url,
&url,
"sgHideBrowserCallAfterLoaded",
)?;
let callback_call = format!(
"callBackJsToCpp(\"{request_url}@_@{url}@_@{callback_name}@_@sgHideBrowserCallAfterLoaded@_@\")",
callback_name = callback.callback_name,
);
Ok(EncodedWsRequest {
payload: serde_json::to_string(&json!([
request_url,
"sgHideBrowserCallAfterLoaded",
url,
callback_call,
]))?,
callback: Some(callback),
})
}
fn encode_click(params: &Value, request_url: &str) -> Result<EncodedWsRequest, PipeError> {
let target_url = target_url(params, request_url)?;
let selector = required_string(params, "selector")?;
let script = format!(
"(function(){{const el=document.querySelector({selector:?});if(!el){{throw new Error(\"selector not found: {selector}\");}}el.click();}})();"
);
encode_js_in_area(request_url, &target_url, &script, None)
}
fn encode_type(params: &Value, request_url: &str) -> Result<EncodedWsRequest, PipeError> {
let target_url = target_url(params, request_url)?;
let selector = required_string(params, "selector")?;
let text = required_string(params, "text")?;
let script = format!(
"(function(){{const el=document.querySelector({selector:?});if(!el){{throw new Error(\"selector not found: {selector}\");}}el.value={text:?};el.dispatchEvent(new Event(\"input\",{{bubbles:true}}));el.dispatchEvent(new Event(\"change\",{{bubbles:true}}));}})();"
);
encode_js_in_area(request_url, &target_url, &script, None)
}
fn encode_get_text(
params: &Value,
request_url: &str,
request_id: Option<&str>,
) -> Result<EncodedWsRequest, PipeError> {
let target_url = target_url(params, request_url)?;
let selector = required_string(params, "selector")?;
let callback = callback_metadata(
request_id,
request_url,
&target_url,
"sgBrowserExcuteJsCodeByArea",
)?;
let script = format!(
"(function(){{const el=document.querySelector({selector:?});if(!el){{throw new Error(\"selector not found: {selector}\");}}const text=el.innerText ?? el.textContent ?? \"\";callBackJsToCpp(\"{request_url}@_@{target_url}@_@{callback_name}@_@sgBrowserExcuteJsCodeByArea@_@\"+String(text));}})();",
callback_name = callback.callback_name
);
encode_js_in_area(request_url, &target_url, &script, Some(callback))
}
fn encode_eval(
params: &Value,
request_url: &str,
request_id: Option<&str>,
) -> Result<EncodedWsRequest, PipeError> {
let target_url = target_url(params, request_url)?;
let source_script = required_string(params, "script")?;
let callback = callback_metadata(
request_id,
request_url,
&target_url,
"sgBrowserExcuteJsCodeByArea",
)?;
let script = format!(
"(function(){{const result=(function(){{{source_script}}})();callBackJsToCpp(\"{request_url}@_@{target_url}@_@{callback_name}@_@sgBrowserExcuteJsCodeByArea@_@\"+String(result));}})();",
callback_name = callback.callback_name
);
encode_js_in_area(request_url, &target_url, &script, Some(callback))
}
fn encode_js_in_area(
request_url: &str,
target_url: &str,
script: &str,
callback: Option<CallbackCorrelation>,
) -> Result<EncodedWsRequest, PipeError> {
Ok(EncodedWsRequest {
payload: serde_json::to_string(&json!([
request_url,
"sgBrowserExcuteJsCodeByArea",
target_url,
script,
JS_AREA_HIDE,
]))?,
callback,
})
}
fn callback_metadata(
request_id: Option<&str>,
request_url: &str,
target_url: &str,
action_url: &str,
) -> Result<CallbackCorrelation, PipeError> {
let request_id = request_id
.map(str::trim)
.filter(|value| !value.is_empty())
.ok_or_else(|| PipeError::Protocol("request_id is required".to_string()))?;
Ok(CallbackCorrelation {
request_id: request_id.to_string(),
callback_name: format!("{CALLBACK_PREFIX}{request_id}"),
source_url: request_url.to_string(),
target_url: target_url.to_string(),
action_url: action_url.to_string(),
})
}
fn target_url(params: &Value, request_url: &str) -> Result<String, PipeError> {
Ok(optional_string(params, "target_url")
.filter(|value| !value.trim().is_empty())
.unwrap_or_else(|| request_url.to_string()))
}
fn required_string(params: &Value, key: &str) -> Result<String, PipeError> {
optional_string(params, key)
.filter(|value| !value.trim().is_empty())
.ok_or_else(|| PipeError::Protocol(format!("{key} is required")))
}
fn optional_string(params: &Value, key: &str) -> Option<String> {
params.get(key)?.as_str().map(ToString::to_string)
}
#[cfg(test)]
mod tests {
use super::{decode_callback_frame, encode_v1_action};
use crate::pipe::Action;
use serde_json::{json, Value};
#[test]
fn get_text_callback_uses_documented_browser_opcode() {
let request = encode_v1_action(
&Action::GetText,
&json!({
"target_url": "https://www.zhihu.com/hot",
"selector": "#content"
}),
"https://www.zhihu.com/hot",
Some("req42"),
)
.unwrap();
let payload: Value = serde_json::from_str(&request.payload).unwrap();
assert_eq!(payload[1], json!("sgBrowserExcuteJsCodeByArea"));
assert_eq!(payload[4], json!("hide"));
assert_eq!(
request.callback.unwrap().action_url,
"sgBrowserExcuteJsCodeByArea"
);
assert!(payload[3].as_str().unwrap().contains(
"callBackJsToCpp(\"https://www.zhihu.com/hot@_@https://www.zhihu.com/hot@_@sgclaw_cb_req42@_@sgBrowserExcuteJsCodeByArea@_@\"+String(text))"
));
}
#[test]
fn eval_callback_uses_documented_browser_opcode() {
let request = encode_v1_action(
&Action::Eval,
&json!({
"target_url": "https://www.zhihu.com/hot",
"script": "2 + 2"
}),
"https://www.zhihu.com/hot",
Some("req-eval"),
)
.unwrap();
let payload: Value = serde_json::from_str(&request.payload).unwrap();
assert_eq!(payload[1], json!("sgBrowserExcuteJsCodeByArea"));
assert_eq!(
request.callback.unwrap().action_url,
"sgBrowserExcuteJsCodeByArea"
);
assert!(payload[3].as_str().unwrap().contains(
"callBackJsToCpp(\"https://www.zhihu.com/hot@_@https://www.zhihu.com/hot@_@sgclaw_cb_req-eval@_@sgBrowserExcuteJsCodeByArea@_@\"+String(result))"
));
}
#[test]
fn decodes_documented_callback_payload() {
let callback = decode_callback_frame(
r#"["https://www.zhihu.com/hot","callBackJsToCpp","https://www.zhihu.com/hot@_@https://www.zhihu.com/hot@_@sgclaw_cb_req42@_@sgBrowserExcuteJsCodeByArea@_@天气"]"#,
)
.unwrap();
assert_eq!(callback.action_url, "sgBrowserExcuteJsCodeByArea");
assert_eq!(callback.response_text, "天气");
}
}

View File

@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_trait::async_trait;
use reqwest::Url;
@@ -8,22 +9,23 @@ use serde_json::{json, Value};
use zeroclaw::skills::{Skill, SkillTool};
use zeroclaw::tools::{Tool, ToolResult};
use crate::pipe::{Action, BrowserPipeTool, Transport};
use crate::browser::BrowserBackend;
use crate::pipe::Action;
pub struct BrowserScriptSkillTool<T: Transport> {
pub struct BrowserScriptSkillTool {
tool_name: String,
tool_description: String,
script_path: PathBuf,
args: HashMap<String, String>,
browser_tool: BrowserPipeTool<T>,
browser_tool: Arc<dyn BrowserBackend>,
}
impl<T: Transport> BrowserScriptSkillTool<T> {
impl BrowserScriptSkillTool {
pub fn new(
skill_name: &str,
tool: &SkillTool,
skill_root: &Path,
browser_tool: BrowserPipeTool<T>,
browser_tool: Arc<dyn BrowserBackend>,
) -> anyhow::Result<Self> {
let script_path = skill_root.join(&tool.command);
let canonical_skill_root = skill_root
@@ -83,7 +85,7 @@ impl<T: Transport> BrowserScriptSkillTool<T> {
}
#[async_trait]
impl<T: Transport + 'static> Tool for BrowserScriptSkillTool<T> {
impl Tool for BrowserScriptSkillTool {
fn name(&self) -> &str {
&self.tool_name
}
@@ -175,12 +177,16 @@ impl<T: Transport + 'static> Tool for BrowserScriptSkillTool<T> {
}
}
pub fn build_browser_script_skill_tools<T: Transport + 'static>(
pub fn build_browser_script_skill_tools(
skills: &[Skill],
browser_tool: BrowserPipeTool<T>,
browser_tool: Arc<dyn BrowserBackend>,
) -> Result<Vec<Box<dyn Tool>>, anyhow::Error> {
let mut tools: Vec<Box<dyn Tool>> = Vec::new();
if !browser_tool.supports_eval() {
return Ok(tools);
}
for skill in skills {
let Some(location) = skill.location.as_ref() else {
continue;

View File

@@ -1,9 +1,12 @@
use std::sync::Arc;
use async_trait::async_trait;
use reqwest::Url;
use serde_json::{json, Map, Value};
use zeroclaw::tools::{Tool, ToolResult};
use crate::pipe::{Action, BrowserPipeTool, ExecutionSurfaceMetadata, Transport};
use crate::browser::BrowserBackend;
use crate::pipe::{Action, ExecutionSurfaceMetadata};
pub const BROWSER_ACTION_TOOL_NAME: &str = "browser_action";
pub const SUPERRPA_BROWSER_TOOL_NAME: &str = "superrpa_browser";
@@ -17,14 +20,14 @@ const MAX_DATA_ARRAY_ITEMS: usize = 12;
const MAX_DATA_OBJECT_FIELDS: usize = 24;
const MAX_DATA_RECURSION_DEPTH: usize = 4;
pub struct ZeroClawBrowserTool<T: Transport> {
browser_tool: BrowserPipeTool<T>,
pub struct ZeroClawBrowserTool {
browser_tool: Arc<dyn BrowserBackend>,
tool_name: &'static str,
description: &'static str,
}
impl<T: Transport> ZeroClawBrowserTool<T> {
pub fn new(browser_tool: BrowserPipeTool<T>) -> Self {
impl ZeroClawBrowserTool {
pub fn new(browser_tool: Arc<dyn BrowserBackend>) -> Self {
Self::named(
browser_tool,
BROWSER_ACTION_TOOL_NAME,
@@ -32,7 +35,7 @@ impl<T: Transport> ZeroClawBrowserTool<T> {
)
}
pub fn new_superrpa(browser_tool: BrowserPipeTool<T>) -> Self {
pub fn new_superrpa(browser_tool: Arc<dyn BrowserBackend>) -> Self {
Self::named(
browser_tool,
SUPERRPA_BROWSER_TOOL_NAME,
@@ -41,7 +44,7 @@ impl<T: Transport> ZeroClawBrowserTool<T> {
}
fn named(
browser_tool: BrowserPipeTool<T>,
browser_tool: Arc<dyn BrowserBackend>,
tool_name: &'static str,
description: &'static str,
) -> Self {
@@ -58,7 +61,7 @@ impl<T: Transport> ZeroClawBrowserTool<T> {
}
#[async_trait]
impl<T: Transport + 'static> Tool for ZeroClawBrowserTool<T> {
impl Tool for ZeroClawBrowserTool {
fn name(&self) -> &str {
self.tool_name
}

View File

@@ -4,10 +4,13 @@ use serde_json::{json, Value};
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
use zeroclaw::tools::{Tool, ToolResult};
use zip::write::SimpleFileOptions;
use zip::{CompressionMethod, ZipWriter};
const OPENXML_OFFICE_TOOL_NAME: &str = "openxml_office";
const DEFAULT_SHEET_NAME: &str = "知乎热榜";
@@ -128,7 +131,9 @@ impl Tool for OpenXmlOfficeTool {
write_payload_json(&payload_path, &normalized_rows)?;
write_request_json(&request_path, &template_path, &payload_path, &output_path)?;
let rendered = run_openxml_cli(&request_path)?;
let rendered = run_openxml_cli(&request_path).or_else(|_| {
render_locally(&template_path, &payload_path, &output_path)
})?;
let artifact_path = rendered["data"]["artifact"]["path"]
.as_str()
.map(str::to_string)
@@ -280,9 +285,14 @@ fn run_openxml_cli(request_path: &Path) -> anyhow::Result<Value> {
.parent()
.map(|path| path.join("openxml_cli").join("Cargo.toml"))
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli manifest path"))?;
let binary_name = if cfg!(windows) {
"openxml-cli.exe"
} else {
"openxml-cli"
};
let binary_path = manifest_path
.parent()
.map(|path| path.join("target").join("debug").join("openxml-cli"))
.map(|path| path.join("target").join("debug").join(binary_name))
.ok_or_else(|| anyhow::anyhow!("failed to resolve openxml_cli binary path"))?;
let output = if binary_path.exists() {
@@ -325,6 +335,87 @@ fn run_openxml_cli(request_path: &Path) -> anyhow::Result<Value> {
Ok(serde_json::from_str(&stdout)?)
}
fn render_locally(template_path: &Path, payload_path: &Path, output_path: &Path) -> anyhow::Result<Value> {
let payload: Value = serde_json::from_slice(&fs::read(payload_path)?)?;
let variables = payload["variables"]
.as_object()
.ok_or_else(|| anyhow::anyhow!("payload.variables must be an object"))?;
let worksheet = render_template_xml(&worksheet_xml_from_xlsx(template_path)?, variables);
write_rendered_xlsx(template_path, output_path, "xl/worksheets/sheet1.xml", &worksheet)?;
Ok(json!({
"data": {
"artifact": {
"path": output_path.to_string_lossy().to_string(),
}
}
}))
}
fn worksheet_xml_from_xlsx(path: &Path) -> anyhow::Result<String> {
let file = fs::File::open(path)?;
let mut archive = zip::ZipArchive::new(file)?;
let mut sheet = archive.by_name("xl/worksheets/sheet1.xml")?;
let mut xml = String::new();
std::io::Read::read_to_string(&mut sheet, &mut xml)?;
Ok(xml)
}
fn render_template_xml(
template: &str,
variables: &serde_json::Map<String, Value>,
) -> String {
let mut rendered = template.to_string();
for (key, value) in variables {
let placeholder = format!("{{{{{key}}}}}");
let replacement = value.as_str().unwrap_or_default();
rendered = rendered.replace(&placeholder, &xml_escape(replacement));
}
rendered
}
fn write_rendered_xlsx(
template_path: &Path,
output_path: &Path,
replaced_entry: &str,
replaced_body: &str,
) -> anyhow::Result<()> {
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent)?;
}
if output_path.exists() {
fs::remove_file(output_path)?;
}
let input = fs::File::open(template_path)?;
let mut archive = zip::ZipArchive::new(input)?;
let output = fs::File::create(output_path)?;
let mut writer = ZipWriter::new(output);
let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
for index in 0..archive.len() {
let mut entry = archive.by_index(index)?;
let name = entry.name().to_string();
writer.start_file(name.as_str(), options)?;
if name == replaced_entry {
writer.write_all(replaced_body.as_bytes())?;
} else {
std::io::copy(&mut entry, &mut writer)?;
}
}
writer.finish()?;
Ok(())
}
fn xml_escape(value: &str) -> String {
value
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
}
fn value_to_string(value: &Value) -> String {
match value {
Value::String(text) => text.clone(),
@@ -336,46 +427,58 @@ fn value_to_string(value: &Value) -> String {
}
fn write_hotlist_template(path: &Path, row_count: usize) -> anyhow::Result<()> {
let build_root = path
.parent()
.ok_or_else(|| anyhow::anyhow!("template path has no parent"))?
.join("template-build");
fs::create_dir_all(build_root.join("_rels"))?;
fs::create_dir_all(build_root.join("docProps"))?;
fs::create_dir_all(build_root.join("xl/_rels"))?;
fs::create_dir_all(build_root.join("xl/worksheets"))?;
write_zip_file(&path, &[Content {
path: "[Content_Types].xml",
body: content_types_xml().to_string(),
},
Content {
path: "_rels/.rels",
body: root_rels_xml().to_string(),
},
Content {
path: "docProps/app.xml",
body: app_xml().to_string(),
},
Content {
path: "docProps/core.xml",
body: core_xml().to_string(),
},
Content {
path: "xl/workbook.xml",
body: workbook_xml().to_string(),
},
Content {
path: "xl/_rels/workbook.xml.rels",
body: workbook_rels_xml().to_string(),
},
Content {
path: "xl/worksheets/sheet1.xml",
body: worksheet_xml(row_count),
}])?;
Ok(())
}
fs::write(build_root.join("[Content_Types].xml"), content_types_xml())?;
fs::write(build_root.join("_rels/.rels"), root_rels_xml())?;
fs::write(build_root.join("docProps/app.xml"), app_xml())?;
fs::write(build_root.join("docProps/core.xml"), core_xml())?;
fs::write(build_root.join("xl/workbook.xml"), workbook_xml())?;
fs::write(
build_root.join("xl/_rels/workbook.xml.rels"),
workbook_rels_xml(),
)?;
fs::write(
build_root.join("xl/worksheets/sheet1.xml"),
worksheet_xml(row_count),
)?;
struct Content<'a> {
path: &'a str,
body: String,
}
fn write_zip_file(path: &Path, entries: &[Content<'_>]) -> anyhow::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
if path.exists() {
fs::remove_file(path)?;
}
let zip = Command::new("zip")
.current_dir(&build_root)
.args(["-q", "-r", path.to_string_lossy().as_ref(), "."])
.output()?;
if !zip.status.success() {
let stderr = String::from_utf8_lossy(&zip.stderr);
return Err(anyhow::anyhow!(format!(
"failed to create xlsx template: {}",
stderr.trim()
)));
let file = fs::File::create(path)?;
let mut zip = ZipWriter::new(file);
let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
for entry in entries {
zip.start_file(entry.path, options)?;
zip.write_all(entry.body.as_bytes())?;
}
let _ = fs::remove_dir_all(&build_root);
zip.finish()?;
Ok(())
}

View File

@@ -1,5 +1,7 @@
use std::path::Path;
use std::sync::Arc;
use crate::browser::BrowserBackend;
use crate::compat::runtime::CompatTaskContext;
use crate::config::SgClawSettings;
use crate::pipe::{BrowserPipeTool, PipeError, Transport};
@@ -26,6 +28,68 @@ pub fn should_use_primary_orchestration(
crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) && needs_export
}
pub fn execute_task_with_browser_backend(
transport: &dyn crate::agent::AgentEventSink,
browser_backend: Arc<dyn BrowserBackend>,
instruction: &str,
task_context: &CompatTaskContext,
workspace_root: &Path,
settings: &SgClawSettings,
) -> Result<String, PipeError> {
let route = crate::compat::workflow_executor::detect_route(
instruction,
task_context.page_url.as_deref(),
task_context.page_title.as_deref(),
);
if let Some(route) = route.clone() {
if crate::compat::workflow_executor::prefers_direct_execution(&route) {
return crate::compat::workflow_executor::execute_route_with_browser_backend(
transport,
browser_backend.clone(),
workspace_root,
instruction,
task_context,
route,
);
}
}
let primary_result = crate::compat::runtime::execute_task_with_browser_backend(
transport,
browser_backend.clone(),
instruction,
task_context,
workspace_root,
settings,
);
match (route, primary_result) {
(Some(route), Ok(summary))
if crate::compat::workflow_executor::should_fallback_after_summary(
&summary, &route,
) =>
{
crate::compat::workflow_executor::execute_route_with_browser_backend(
transport,
browser_backend,
workspace_root,
instruction,
task_context,
route,
)
}
(_, Ok(summary)) => Ok(summary),
(Some(route), Err(_)) => crate::compat::workflow_executor::execute_route_with_browser_backend(
transport,
browser_backend,
workspace_root,
instruction,
task_context,
route,
),
(None, Err(err)) => Err(err),
}
}
pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
transport: &T,
browser_tool: BrowserPipeTool<T>,

View File

@@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use async_trait::async_trait;
use futures_util::{stream, StreamExt};
@@ -8,6 +9,7 @@ use zeroclaw::config::Config as ZeroClawConfig;
use zeroclaw::providers::traits::{ProviderCapabilities, StreamEvent, StreamOptions, StreamResult};
use zeroclaw::providers::{self, ChatMessage, ChatRequest, ChatResponse, Provider};
use crate::browser::{BrowserBackend, PipeBrowserBackend};
use crate::compat::browser_script_skill_tool::build_browser_script_skill_tools;
use crate::compat::browser_tool_adapter::ZeroClawBrowserTool;
use crate::compat::config_adapter::{
@@ -47,6 +49,32 @@ pub fn execute_task<T: Transport + 'static>(
)
}
pub fn execute_task_with_browser_backend(
transport: &dyn crate::agent::AgentEventSink,
browser_backend: Arc<dyn BrowserBackend>,
instruction: &str,
task_context: &CompatTaskContext,
workspace_root: &Path,
settings: &SgClawSettings,
) -> Result<String, PipeError> {
let config = build_zeroclaw_config_from_sgclaw_settings(workspace_root, settings);
let skills_dir = resolve_skills_dir_from_sgclaw_settings(workspace_root, settings);
let provider = build_provider(&config)?;
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?;
runtime.block_on(execute_task_with_provider(
transport,
browser_backend,
provider,
instruction,
task_context,
config,
skills_dir,
settings.clone(),
))
}
pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
transport: &T,
browser_tool: BrowserPipeTool<T>,
@@ -63,7 +91,7 @@ pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
runtime.block_on(execute_task_with_provider(
transport,
browser_tool,
Arc::new(PipeBrowserBackend::from_inner(browser_tool)),
provider,
instruction,
task_context,
@@ -73,9 +101,9 @@ pub fn execute_task_with_sgclaw_settings<T: Transport + 'static>(
))
}
pub async fn execute_task_with_provider<T: Transport + 'static>(
transport: &T,
browser_tool: BrowserPipeTool<T>,
pub async fn execute_task_with_provider(
transport: &dyn crate::agent::AgentEventSink,
browser_backend: Arc<dyn BrowserBackend>,
provider: Box<dyn Provider>,
instruction: &str,
task_context: &CompatTaskContext,
@@ -116,11 +144,13 @@ pub async fn execute_task_with_provider<T: Transport + 'static>(
message: format!("loaded skills: {}", loaded_skill_labels.join(", ")),
})?;
}
let browser_tool_for_scripts = browser_tool.clone();
let browser_tool_for_scripts = browser_backend.clone();
let browser_tool_for_superrpa = browser_backend.clone();
let browser_tool_for_browser_action = browser_backend;
let mut tools: Vec<Box<dyn zeroclaw::tools::Tool>> = if browser_surface_present {
vec![
Box::new(ZeroClawBrowserTool::new_superrpa(browser_tool.clone())),
Box::new(ZeroClawBrowserTool::new(browser_tool)),
Box::new(ZeroClawBrowserTool::new_superrpa(browser_tool_for_superrpa)),
Box::new(ZeroClawBrowserTool::new(browser_tool_for_browser_action)),
]
} else {
Vec::new()

View File

@@ -1,5 +1,6 @@
use std::fs;
use std::path::Path;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
@@ -7,6 +8,7 @@ use regex::Regex;
use serde_json::{json, Value};
use zeroclaw::tools::Tool;
use crate::browser::{BrowserBackend, PipeBrowserBackend};
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::runtime::CompatTaskContext;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
@@ -21,8 +23,13 @@ const ZHIHU_CREATOR_URL: &str = "https://www.zhihu.com/creator";
const ZHIHU_EDITOR_URL: &str = "https://zhuanlan.zhihu.com/write";
const HOTLIST_READY_POLL_ATTEMPTS: usize = 10;
const HOTLIST_READY_POLL_INTERVAL: Duration = Duration::from_millis(500);
// Simplified readiness pattern: only checks that *some* heat metric exists
// (e.g. "3440万热度", "2.1亿"). The full rank-title-heat structure is validated
// later by the extraction script. Using a simple pattern avoids problems with
// the multi-line innerText format where rank, title, and heat are on separate
// lines (`.` does not cross newlines by default).
const HOTLIST_TEXT_READY_PATTERN: &str =
r"(?:^|\n)\s*1(?:[.、]|\s)+.+\d+(?:\.\d+)?\s*(?:万|亿|k|K|m|M)(?:热度)?";
r"\d+(?:\.\d+)?\s*(?:万|亿|k|K|m|M)\s*(?:热度)?";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkflowRoute {
ZhihuHotlistExportXlsx,
@@ -113,9 +120,9 @@ pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bo
)
}
pub fn execute_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
pub fn execute_route_with_browser_backend(
transport: &dyn crate::agent::AgentEventSink,
browser_backend: Arc<dyn BrowserBackend>,
workspace_root: &Path,
instruction: &str,
task_context: &CompatTaskContext,
@@ -124,37 +131,61 @@ pub fn execute_route<T: Transport + 'static>(
match route {
WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen => {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n, task_context)?;
let items = collect_hotlist_items(transport, browser_backend.as_ref(), top_n, task_context)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => {
export_xlsx(transport, workspace_root, &items)
}
WorkflowRoute::ZhihuHotlistScreen => {
export_screen(transport, workspace_root, &items)
}
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
_ => unreachable!("handled by outer match"),
}
}
WorkflowRoute::ZhihuArticleEntry => {
execute_zhihu_article_entry_route(transport, browser_tool)
}
WorkflowRoute::ZhihuArticleDraft => {
execute_zhihu_article_route(transport, browser_tool, instruction, task_context, false)
}
WorkflowRoute::ZhihuArticlePublish => {
execute_zhihu_article_route(transport, browser_tool, instruction, task_context, true)
execute_zhihu_article_entry_route(transport, browser_backend.as_ref())
}
WorkflowRoute::ZhihuArticleDraft => execute_zhihu_article_route(
transport,
browser_backend.as_ref(),
instruction,
task_context,
false,
),
WorkflowRoute::ZhihuArticlePublish => execute_zhihu_article_route(
transport,
browser_backend.as_ref(),
instruction,
task_context,
true,
),
}
}
fn collect_hotlist_items<T: Transport + 'static>(
pub fn execute_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
workspace_root: &Path,
instruction: &str,
task_context: &CompatTaskContext,
route: WorkflowRoute,
) -> Result<String, PipeError> {
let browser_backend: Arc<dyn BrowserBackend> =
Arc::new(PipeBrowserBackend::from_inner(browser_tool.clone()));
execute_route_with_browser_backend(
transport,
browser_backend,
workspace_root,
instruction,
task_context,
route,
)
}
fn collect_hotlist_items(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
top_n: usize,
task_context: &CompatTaskContext,
) -> Result<Vec<HotlistItem>, PipeError> {
@@ -185,9 +216,9 @@ fn collect_hotlist_items<T: Transport + 'static>(
parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data))
}
fn ensure_hotlist_page_ready<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn ensure_hotlist_page_ready(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
top_n: usize,
task_context: &CompatTaskContext,
) -> Result<Option<Vec<HotlistItem>>, PipeError> {
@@ -227,9 +258,9 @@ fn ensure_hotlist_page_ready<T: Transport + 'static>(
Err(last_error.unwrap_or_else(|| PipeError::Protocol("知乎热榜页面未就绪".to_string())))
}
fn probe_hotlist_extractor<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn probe_hotlist_extractor(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
top_n: usize,
) -> Result<Option<Vec<HotlistItem>>, PipeError> {
transport.send(&AgentMessage::LogEntry {
@@ -242,19 +273,32 @@ fn probe_hotlist_extractor<T: Transport + 'static>(
ZHIHU_DOMAIN,
)?;
if !response.success {
eprintln!("probe_hotlist_extractor: eval not successful data={}", response.data);
return Ok(None);
}
match parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data)) {
let eval_text = response.data.get("text").unwrap_or(&response.data);
let eval_preview: String = eval_text
.as_str()
.unwrap_or_default()
.chars()
.take(300)
.collect();
eprintln!(
"probe_hotlist_extractor: eval_len={} preview={eval_preview:?}",
eval_text.as_str().unwrap_or_default().len()
);
match parse_hotlist_items_payload(eval_text) {
Ok(items) if !items.is_empty() => Ok(Some(items)),
Ok(_) => Ok(None),
Err(_) => Ok(None),
}
}
fn navigate_hotlist_page<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn navigate_hotlist_page(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
) -> Result<(), PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
@@ -275,16 +319,34 @@ fn navigate_hotlist_page<T: Transport + 'static>(
}
}
fn poll_for_hotlist_readiness<T: Transport + 'static>(
browser_tool: &BrowserPipeTool<T>,
) -> Result<bool, PipeError> {
fn poll_for_hotlist_readiness(browser_tool: &dyn BrowserBackend) -> Result<bool, PipeError> {
let ready_pattern =
Regex::new(HOTLIST_TEXT_READY_PATTERN).expect("hotlist readiness regex must compile");
for attempt in 0..HOTLIST_READY_POLL_ATTEMPTS {
let response =
browser_tool.invoke(Action::GetText, json!({ "selector": "body" }), ZHIHU_DOMAIN)?;
// Tolerate individual GetText failures (e.g. callback timeout) they
// are expected while the page is still loading or the callback delivery
// path is not yet established. Only a PipeClosed error is fatal.
let response = match browser_tool.invoke(
Action::GetText,
json!({ "selector": "body" }),
ZHIHU_DOMAIN,
) {
Ok(resp) => resp,
Err(PipeError::PipeClosed) => return Err(PipeError::PipeClosed),
Err(_) => {
if attempt + 1 < HOTLIST_READY_POLL_ATTEMPTS {
thread::sleep(HOTLIST_READY_POLL_INTERVAL);
}
continue;
}
};
if response.success {
let payload = response.data.get("text").unwrap_or(&response.data);
let preview: String = payload.as_str().unwrap_or_default().chars().take(200).collect();
eprintln!(
"poll_hotlist_readiness[{attempt}]: text_len={} preview={preview:?}",
payload.as_str().unwrap_or_default().len()
);
if hotlist_text_looks_ready(payload, &ready_pattern) {
return Ok(true);
}
@@ -302,8 +364,8 @@ fn hotlist_text_looks_ready(payload: &Value, ready_pattern: &Regex) -> bool {
text.contains("热榜") && ready_pattern.is_match(text)
}
fn export_xlsx<T: Transport>(
transport: &T,
fn export_xlsx(
transport: &dyn crate::agent::AgentEventSink,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
@@ -341,8 +403,8 @@ fn export_xlsx<T: Transport>(
Ok(format!("已导出知乎热榜 Excel {output_path}"))
}
fn export_screen<T: Transport>(
transport: &T,
fn export_screen(
transport: &dyn crate::agent::AgentEventSink,
workspace_root: &Path,
items: &[HotlistItem],
) -> Result<String, PipeError> {
@@ -376,9 +438,9 @@ fn export_screen<T: Transport>(
Ok(format!("已生成知乎热榜大屏 {output_path}"))
}
fn execute_zhihu_article_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn execute_zhihu_article_route(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
instruction: &str,
task_context: &CompatTaskContext,
publish_mode: bool,
@@ -479,9 +541,9 @@ fn execute_zhihu_article_route<T: Transport + 'static>(
}
}
fn execute_zhihu_article_entry_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn execute_zhihu_article_entry_route(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
) -> Result<String, PipeError> {
navigate_zhihu_page(transport, browser_tool, ZHIHU_CREATOR_URL)?;
transport.send(&AgentMessage::LogEntry {
@@ -596,9 +658,9 @@ fn extract_top_n(instruction: &str) -> usize {
.unwrap_or(10)
}
fn navigate_zhihu_page<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn navigate_zhihu_page(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
url: &str,
) -> Result<(), PipeError> {
transport.send(&AgentMessage::LogEntry {
@@ -616,8 +678,8 @@ fn navigate_zhihu_page<T: Transport + 'static>(
}
}
fn execute_browser_skill_script<T: Transport + 'static>(
browser_tool: &BrowserPipeTool<T>,
fn execute_browser_skill_script(
browser_tool: &dyn BrowserBackend,
skill_name: &str,
script_name: &str,
args: Value,
@@ -641,9 +703,9 @@ fn execute_browser_skill_script<T: Transport + 'static>(
))
}
fn navigate_to_editor_after_creator_entry<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
fn navigate_to_editor_after_creator_entry(
transport: &dyn crate::agent::AgentEventSink,
browser_tool: &dyn BrowserBackend,
creator_state: &Value,
) -> Result<(), PipeError> {
let status = payload_status(creator_state);
@@ -679,7 +741,7 @@ mod tests {
use std::collections::VecDeque;
use std::sync::{Arc, Mutex};
use crate::pipe::{BrowserMessage, Timing};
use crate::pipe::{BrowserMessage, CommandOutput, ExecutionSurfaceMetadata, Timing};
use crate::security::MacPolicy;
struct MockWorkflowTransport {
@@ -743,6 +805,245 @@ mod tests {
}
}
#[derive(Default)]
struct FakeBrowserBackend {
responses: Mutex<VecDeque<Result<CommandOutput, PipeError>>>,
invocations: Mutex<Vec<(Action, Value, String)>>,
}
impl FakeBrowserBackend {
fn new(responses: Vec<Result<CommandOutput, PipeError>>) -> Self {
Self {
responses: Mutex::new(VecDeque::from(responses)),
invocations: Mutex::new(Vec::new()),
}
}
fn invocations(&self) -> Vec<(Action, Value, String)> {
self.invocations.lock().unwrap().clone()
}
}
impl BrowserBackend for FakeBrowserBackend {
fn invoke(
&self,
action: Action,
params: Value,
expected_domain: &str,
) -> Result<CommandOutput, PipeError> {
self.invocations
.lock()
.unwrap()
.push((action, params, expected_domain.to_string()));
self.responses
.lock()
.unwrap()
.pop_front()
.unwrap_or_else(|| Err(PipeError::Timeout))
}
fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
ExecutionSurfaceMetadata::privileged_browser_pipe("fake_backend")
}
}
#[test]
fn execute_route_with_browser_backend_runs_direct_route_with_ws_style_backend() {
let transport = Arc::new(MockWorkflowTransport::new(vec![]));
let backend = Arc::new(FakeBrowserBackend::new(vec![
Ok(CommandOutput {
seq: 1,
success: true,
data: json!({}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
Ok(CommandOutput {
seq: 2,
success: true,
data: json!({"text": "已进入编辑器"}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
Ok(CommandOutput {
seq: 3,
success: true,
data: json!({
"text": {
"status": "editor_ready",
"current_url": "https://zhuanlan.zhihu.com/write"
}
}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
]));
let summary = execute_route_with_browser_backend(
transport.as_ref(),
backend.clone(),
Path::new("."),
"打开知乎写文章页面",
&CompatTaskContext::default(),
WorkflowRoute::ZhihuArticleEntry,
)
.expect("ws-style backend should satisfy direct route execution");
assert_eq!(summary, "已进入知乎文章编辑器。");
assert_eq!(
backend.invocations(),
vec![
(
Action::Navigate,
json!({ "url": ZHIHU_CREATOR_URL }),
ZHIHU_DOMAIN.to_string(),
),
(
Action::Eval,
json!({
"script": load_browser_skill_script(
"zhihu-navigate",
"open_creator_entry.js",
json!({ "desired_target": "article_editor" })
)
.expect("zhihu navigate script should load")
}),
ZHIHU_DOMAIN.to_string(),
),
(
Action::Eval,
json!({
"script": load_browser_skill_script(
"zhihu-write",
"prepare_article_editor.js",
json!({ "desired_mode": "draft" })
)
.expect("zhihu write script should load")
}),
ZHIHU_EDITOR_DOMAIN.to_string(),
),
]
);
}
#[test]
fn execute_route_with_browser_backend_keeps_bridge_style_article_entry_direct_route() {
let transport = Arc::new(MockWorkflowTransport::new(vec![]));
let backend = Arc::new(FakeBrowserBackend::new(vec![
Ok(CommandOutput {
seq: 1,
success: true,
data: json!({}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
Ok(CommandOutput {
seq: 2,
success: true,
data: json!({
"text": {
"status": "creator_entry_clicked",
"current_url": "https://www.zhihu.com/creator",
"next_url": ZHIHU_EDITOR_URL,
}
}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
Ok(CommandOutput {
seq: 3,
success: true,
data: json!({}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
Ok(CommandOutput {
seq: 4,
success: true,
data: json!({
"text": {
"status": "editor_ready",
"current_url": ZHIHU_EDITOR_URL,
}
}),
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}),
]));
let summary = execute_route_with_browser_backend(
transport.as_ref(),
backend.clone(),
Path::new("."),
"打开知乎写文章页面",
&CompatTaskContext::default(),
WorkflowRoute::ZhihuArticleEntry,
)
.expect("bridge-style backend should satisfy direct route execution");
assert_eq!(summary, "已进入知乎文章编辑器。");
assert_eq!(
backend.invocations(),
vec![
(
Action::Navigate,
json!({ "url": ZHIHU_CREATOR_URL }),
ZHIHU_DOMAIN.to_string(),
),
(
Action::Eval,
json!({
"script": load_browser_skill_script(
"zhihu-navigate",
"open_creator_entry.js",
json!({ "desired_target": "article_editor" })
)
.expect("zhihu navigate script should load")
}),
ZHIHU_DOMAIN.to_string(),
),
(
Action::Navigate,
json!({ "url": ZHIHU_EDITOR_URL }),
ZHIHU_EDITOR_DOMAIN.to_string(),
),
(
Action::Eval,
json!({
"script": load_browser_skill_script(
"zhihu-write",
"prepare_article_editor.js",
json!({ "desired_mode": "draft" })
)
.expect("zhihu write script should load")
}),
ZHIHU_EDITOR_DOMAIN.to_string(),
),
]
);
}
#[test]
fn collect_hotlist_items_skips_navigation_when_hot_page_is_already_readable() {
let transport = Arc::new(MockWorkflowTransport::new(vec![
@@ -771,7 +1072,8 @@ mod tests {
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
let browser_backend = PipeBrowserBackend::from_inner(browser_tool);
let items = collect_hotlist_items(transport.as_ref(), &browser_backend, 10, &task_context)
.expect("hotlist collection should succeed");
assert_eq!(items.len(), 2);
@@ -824,7 +1126,8 @@ mod tests {
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
let browser_backend = PipeBrowserBackend::from_inner(browser_tool);
let items = collect_hotlist_items(transport.as_ref(), &browser_backend, 10, &task_context)
.expect("hotlist collection should succeed after readiness polling");
assert_eq!(items.len(), 1);
@@ -892,7 +1195,8 @@ mod tests {
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
let browser_backend = PipeBrowserBackend::from_inner(browser_tool);
let items = collect_hotlist_items(transport.as_ref(), &browser_backend, 10, &task_context)
.expect("hotlist collection should succeed after one navigation retry");
assert_eq!(items.len(), 1);
@@ -958,7 +1262,8 @@ mod tests {
..CompatTaskContext::default()
};
let items = collect_hotlist_items(transport.as_ref(), &browser_tool, 10, &task_context)
let browser_backend = PipeBrowserBackend::from_inner(browser_tool);
let items = collect_hotlist_items(transport.as_ref(), &browser_backend, 10, &task_context)
.expect("hotlist collection should succeed via extractor probe");
assert_eq!(items.len(), 1);

View File

@@ -132,6 +132,8 @@ pub struct SgClawSettings {
pub active_provider: String,
pub browser_backend: BrowserBackend,
pub office_backend: OfficeBackend,
pub browser_ws_url: Option<String>,
pub service_ws_listen_addr: Option<String>,
}
impl SgClawSettings {
@@ -167,6 +169,8 @@ impl SgClawSettings {
None,
None,
None,
None,
None,
)
}
@@ -202,6 +206,8 @@ impl SgClawSettings {
None,
None,
None,
None,
None,
)?))
}
@@ -285,6 +291,8 @@ impl SgClawSettings {
config.active_provider,
browser_backend,
office_backend,
config.browser_ws_url,
config.service_ws_listen_addr,
)
.map_err(|err| err.with_path(path))
}
@@ -301,6 +309,8 @@ impl SgClawSettings {
active_provider: Option<String>,
browser_backend: Option<BrowserBackend>,
office_backend: Option<OfficeBackend>,
browser_ws_url: Option<String>,
service_ws_listen_addr: Option<String>,
) -> Result<Self, ConfigError> {
let providers = if providers.is_empty() {
vec![ProviderSettings::from_legacy_deepseek(
@@ -336,6 +346,8 @@ impl SgClawSettings {
active_provider,
browser_backend: browser_backend.unwrap_or(BrowserBackend::SuperRpa),
office_backend: office_backend.unwrap_or(OfficeBackend::OpenXml),
browser_ws_url: normalize_optional_value(browser_ws_url),
service_ws_listen_addr: normalize_optional_value(service_ws_listen_addr),
})
}
}
@@ -495,6 +507,10 @@ struct RawSgClawSettings {
browser_backend: Option<String>,
#[serde(rename = "officeBackend", alias = "office_backend", default)]
office_backend: Option<String>,
#[serde(rename = "browserWsUrl", alias = "browser_ws_url", default)]
browser_ws_url: Option<String>,
#[serde(rename = "serviceWsListenAddr", alias = "service_ws_listen_addr", default)]
service_ws_listen_addr: Option<String>,
#[serde(default)]
providers: Vec<RawProviderSettings>,
}

View File

@@ -1,10 +1,14 @@
pub mod agent;
pub mod browser;
pub mod compat;
pub mod config;
pub mod llm;
pub mod pipe;
pub mod runtime;
pub mod security;
pub mod service;
pub use browser::ws_probe::{parse_probe_args, run_probe_script, ProbeError, ProbeOutcome};
use std::path::PathBuf;
use std::sync::Arc;

View File

@@ -55,10 +55,22 @@ impl<T: Transport> BrowserPipeTool<T> {
self
}
pub fn response_timeout(&self) -> Duration {
self.response_timeout
}
pub fn mac_policy(&self) -> &MacPolicy {
&self.mac_policy
}
pub fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.mac_policy.privileged_surface_metadata()
}
pub fn supports_eval(&self) -> bool {
self.mac_policy.supports_pipe_action(&Action::Eval)
}
pub fn invoke(
&self,
action: Action,
@@ -115,6 +127,21 @@ impl<T: Transport> BrowserPipeTool<T> {
"received duplicate init after handshake".to_string(),
));
}
BrowserMessage::Connect => {
return Err(PipeError::UnexpectedMessage(
"received connect while waiting for response".to_string(),
));
}
BrowserMessage::Start => {
return Err(PipeError::UnexpectedMessage(
"received start while waiting for response".to_string(),
));
}
BrowserMessage::Stop => {
return Err(PipeError::UnexpectedMessage(
"received stop while waiting for response".to_string(),
));
}
BrowserMessage::SubmitTask { .. } => {
return Err(PipeError::UnexpectedMessage(
"received submit_task while waiting for response".to_string(),

View File

@@ -55,6 +55,9 @@ pub enum BrowserMessage {
#[serde(default)]
capabilities: Vec<String>,
},
Connect,
Start,
Stop,
SubmitTask {
instruction: String,
#[serde(default)]
@@ -119,6 +122,9 @@ pub enum AgentMessage {
agent_id: String,
supported_actions: Vec<Action>,
},
StatusChanged {
state: String,
},
LogEntry {
level: String,
message: String,

View File

@@ -39,6 +39,20 @@ impl MacPolicy {
Ok(policy)
}
pub fn supports_pipe_action(&self, action: &Action) -> bool {
let action_name = action.as_str();
!self
.pipe_actions
.blocked
.iter()
.any(|blocked| blocked == action_name)
&& self
.pipe_actions
.allowed
.iter()
.any(|allowed| allowed == action_name)
}
pub fn validate(&self, action: &Action, expected_domain: &str) -> Result<(), SecurityError> {
let action_name = action.as_str();
if self

111
src/service/mod.rs Normal file
View File

@@ -0,0 +1,111 @@
mod protocol;
pub(crate) mod server;
use std::net::TcpListener;
use std::sync::Arc;
use tungstenite::accept;
use crate::agent::AgentRuntimeContext;
use crate::pipe::PipeError;
use crate::security::MacPolicy;
const DEFAULT_BROWSER_WS_URL: &str = "ws://127.0.0.1:12345";
const DEFAULT_SERVICE_WS_LISTEN_ADDR: &str = "127.0.0.1:42321";
pub use protocol::{ClientMessage, ServiceMessage};
pub use server::{serve_client, ServiceEventSink, ServiceSession};
pub(crate) mod browser_ws_client {
pub(crate) use super::server::{initial_request_url_for_submit_task, ServiceWsClient};
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct ServiceStartupConfig {
pub browser_ws_url: Option<String>,
pub service_ws_listen_addr: Option<String>,
}
pub fn load_startup_config(
runtime_context: &AgentRuntimeContext,
) -> Result<ServiceStartupConfig, PipeError> {
let settings = runtime_context
.load_sgclaw_settings()?
.ok_or_else(|| PipeError::Protocol("missing environment variable: DEEPSEEK_API_KEY".to_string()))?;
Ok(ServiceStartupConfig {
browser_ws_url: Some(
settings
.browser_ws_url
.unwrap_or_else(|| DEFAULT_BROWSER_WS_URL.to_string()),
),
service_ws_listen_addr: Some(
settings
.service_ws_listen_addr
.unwrap_or_else(|| DEFAULT_SERVICE_WS_LISTEN_ADDR.to_string()),
),
})
}
pub fn run() -> Result<(), PipeError> {
let runtime_context = AgentRuntimeContext::from_process_args(std::env::args_os())?;
let startup = load_startup_config(&runtime_context)?;
let service_ws_listen_addr = startup
.service_ws_listen_addr
.as_deref()
.unwrap_or(DEFAULT_SERVICE_WS_LISTEN_ADDR);
let browser_ws_url = startup
.browser_ws_url
.as_deref()
.unwrap_or(DEFAULT_BROWSER_WS_URL);
let listener = TcpListener::bind(service_ws_listen_addr)
.map_err(|err| PipeError::Protocol(format!("failed to bind service listener {service_ws_listen_addr}: {err}")))?;
let mac_policy = load_service_mac_policy()?;
let session = ServiceSession::new();
eprintln!(
"sg_claw ready: service_ws_listen_addr={}, browser_ws_url={}",
service_ws_listen_addr,
browser_ws_url,
);
loop {
let (stream, _) = listener.accept()?;
let websocket = accept(stream)
.map_err(|err| PipeError::Protocol(format!("service websocket accept failed: {err}")))?;
let sink = Arc::new(ServiceEventSink::from_websocket(websocket));
match session.try_attach_client() {
Ok(()) => {
let result = serve_client(
&runtime_context,
&session,
sink.clone(),
browser_ws_url,
&mac_policy,
);
session.detach_client();
match result {
Ok(()) | Err(PipeError::PipeClosed) => {}
Err(err) => return Err(err),
}
}
Err(message) => {
sink.send_service_message(message)?;
}
}
}
}
fn load_service_mac_policy() -> Result<MacPolicy, PipeError> {
let current_exe = std::env::current_exe()?;
let candidate = current_exe
.parent()
.map(|dir| dir.join("resources").join("rules.json"))
.unwrap_or_else(|| std::path::PathBuf::from("resources").join("rules.json"));
let path = if candidate.exists() {
candidate
} else {
std::env::current_dir()?.join("resources").join("rules.json")
};
MacPolicy::load_from_path(&path).map_err(PipeError::from)
}

59
src/service/protocol.rs Normal file
View File

@@ -0,0 +1,59 @@
use serde::{Deserialize, Serialize};
use crate::agent::SubmitTaskRequest;
use crate::pipe::ConversationMessage;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ClientMessage {
Connect,
Start,
Stop,
SubmitTask {
instruction: String,
#[serde(default)]
conversation_id: String,
#[serde(default)]
messages: Vec<ConversationMessage>,
#[serde(default)]
page_url: String,
#[serde(default)]
page_title: String,
},
Ping,
}
impl ClientMessage {
pub fn into_submit_task_request(self) -> Option<SubmitTaskRequest> {
match self {
ClientMessage::SubmitTask {
instruction,
conversation_id,
messages,
page_url,
page_title,
} => Some(SubmitTaskRequest {
instruction,
conversation_id: normalize_optional_field(conversation_id),
messages,
page_url: normalize_optional_field(page_url),
page_title: normalize_optional_field(page_title),
}),
ClientMessage::Connect | ClientMessage::Start | ClientMessage::Stop | ClientMessage::Ping => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ServiceMessage {
StatusChanged { state: String },
LogEntry { level: String, message: String },
TaskComplete { success: bool, summary: String },
Busy { message: String },
}
fn normalize_optional_field(value: String) -> Option<String> {
let trimmed = value.trim();
(!trimmed.is_empty()).then(|| trimmed.to_string())
}

907
src/service/server.rs Normal file
View File

@@ -0,0 +1,907 @@
use std::net::TcpStream;
use std::sync::{Arc, Mutex};
use std::time::Duration;
#[cfg(test)]
use reqwest::blocking::Client;
#[cfg(test)]
use serde_json::{json, Map};
#[cfg(test)]
use serde_json::Value;
use tungstenite::stream::MaybeTlsStream;
use tungstenite::{connect, Message, WebSocket};
use crate::agent::{
run_submit_task_with_browser_backend, AgentEventSink, AgentRuntimeContext, SubmitTaskRequest,
};
use crate::browser::callback_host::LiveBrowserCallbackHost;
use crate::browser::ws_backend::WsClient;
#[cfg(test)]
use crate::browser::bridge_contract::{
BridgeBrowserActionError, BridgeBrowserActionReply, BridgeBrowserActionRequest,
BridgeBrowserActionSuccess,
};
#[cfg(test)]
use crate::browser::bridge_transport::BridgeActionTransport;
use crate::browser::{BrowserBackend, BrowserCallbackBackend};
#[cfg(test)]
use crate::browser::BridgeBrowserBackend;
use crate::pipe::{AgentMessage, BrowserMessage, PipeError, Transport};
#[cfg(test)]
use crate::pipe::Timing;
use crate::security::MacPolicy;
use super::{ClientMessage, ServiceMessage};
const BROWSER_RESPONSE_TIMEOUT: Duration = Duration::from_secs(30);
#[cfg(test)]
const DEFAULT_BRIDGE_BASE_URL: &str = "http://localhost:23323";
#[derive(Debug, Default)]
pub struct ServiceSession {
attached: Mutex<bool>,
task_running: Mutex<bool>,
}
impl ServiceSession {
pub fn new() -> Self {
Self {
attached: Mutex::new(false),
task_running: Mutex::new(false),
}
}
pub fn try_attach_client(&self) -> Result<(), ServiceMessage> {
let mut attached = self.attached.lock().unwrap();
if *attached {
return Err(ServiceMessage::Busy {
message: "service already has an attached client".to_string(),
});
}
*attached = true;
Ok(())
}
pub fn detach_client(&self) {
let mut attached = self.attached.lock().unwrap();
*attached = false;
let mut task_running = self.task_running.lock().unwrap();
*task_running = false;
}
pub fn try_start_task(&self) -> Result<(), ServiceMessage> {
let attached = self.attached.lock().unwrap();
if !*attached {
return Err(ServiceMessage::Busy {
message: "service has no attached client".to_string(),
});
}
drop(attached);
let mut task_running = self.task_running.lock().unwrap();
if *task_running {
return Err(ServiceMessage::Busy {
message: "service already has a running task".to_string(),
});
}
*task_running = true;
Ok(())
}
pub fn finish_task(&self) {
let mut task_running = self.task_running.lock().unwrap();
*task_running = false;
}
}
#[derive(Debug)]
pub struct ServiceEventSink {
sent: Mutex<Vec<ServiceMessage>>,
writer: Option<Mutex<WebSocket<TcpStream>>>,
}
impl Default for ServiceEventSink {
fn default() -> Self {
Self {
sent: Mutex::new(Vec::new()),
writer: None,
}
}
}
impl ServiceEventSink {
pub fn from_websocket(websocket: WebSocket<TcpStream>) -> Self {
Self {
sent: Mutex::new(Vec::new()),
writer: Some(Mutex::new(websocket)),
}
}
pub fn send_service_message(&self, message: ServiceMessage) -> Result<(), PipeError> {
self.sent.lock().unwrap().push(message.clone());
if let Some(writer) = &self.writer {
let payload = serde_json::to_string(&message)?;
writer
.lock()
.map_err(|_| PipeError::Protocol("service websocket writer lock poisoned".to_string()))?
.send(Message::Text(payload.into()))
.map_err(|err| PipeError::Protocol(format!("service websocket send failed: {err}")))?;
}
Ok(())
}
pub fn recv_client_message(&self) -> Result<Option<ClientMessage>, PipeError> {
let Some(writer) = &self.writer else {
return Err(PipeError::Protocol(
"service sink has no websocket reader".to_string(),
));
};
loop {
let mut websocket = writer
.lock()
.map_err(|_| PipeError::Protocol("service websocket writer lock poisoned".to_string()))?;
match websocket.read() {
Ok(Message::Text(text)) => return Ok(Some(serde_json::from_str(&text)?)),
Ok(Message::Close(_)) => return Ok(None),
Ok(Message::Ping(payload)) => {
websocket
.send(Message::Pong(payload))
.map_err(|err| PipeError::Protocol(format!("service websocket pong failed: {err}")))?;
}
Ok(_) => {}
Err(tungstenite::Error::ConnectionClosed) | Err(tungstenite::Error::AlreadyClosed) => {
return Ok(None)
}
Err(err) => return Err(map_service_websocket_error(err, "read")),
}
}
}
pub fn sent_messages(&self) -> Vec<ServiceMessage> {
self.sent.lock().unwrap().clone()
}
}
impl AgentEventSink for ServiceEventSink {
fn send(&self, message: &AgentMessage) -> Result<(), PipeError> {
let mapped = match message {
AgentMessage::StatusChanged { state } => ServiceMessage::StatusChanged {
state: state.clone(),
},
AgentMessage::LogEntry { level, message } => ServiceMessage::LogEntry {
level: level.clone(),
message: message.clone(),
},
AgentMessage::TaskComplete { success, summary } => ServiceMessage::TaskComplete {
success: *success,
summary: summary.clone(),
},
_ => {
return Err(PipeError::Protocol(
"unsupported agent message for service sink".to_string(),
))
}
};
self.send_service_message(mapped)
}
}
fn map_service_websocket_error(err: tungstenite::Error, operation: &str) -> PipeError {
match err {
tungstenite::Error::ConnectionClosed
| tungstenite::Error::AlreadyClosed
| tungstenite::Error::Protocol(tungstenite::error::ProtocolError::ResetWithoutClosingHandshake)
| tungstenite::Error::Protocol(tungstenite::error::ProtocolError::SendAfterClosing) => {
PipeError::PipeClosed
}
tungstenite::Error::Io(io_err)
if matches!(
io_err.kind(),
std::io::ErrorKind::TimedOut | std::io::ErrorKind::WouldBlock
) =>
{
PipeError::Timeout
}
tungstenite::Error::Io(io_err)
if matches!(
io_err.kind(),
std::io::ErrorKind::ConnectionAborted
| std::io::ErrorKind::ConnectionReset
| std::io::ErrorKind::BrokenPipe
| std::io::ErrorKind::UnexpectedEof
) =>
{
PipeError::PipeClosed
}
tungstenite::Error::Io(io_err) => {
PipeError::Protocol(format!("service websocket {operation} failed: {io_err}"))
}
other => PipeError::Protocol(format!("service websocket {operation} failed: {other}")),
}
}
fn send_status_changed(sink: &ServiceEventSink, state: &str) -> Result<(), PipeError> {
sink.send(&AgentMessage::StatusChanged {
state: state.to_string(),
})
}
pub fn serve_client(
context: &AgentRuntimeContext,
session: &ServiceSession,
sink: Arc<ServiceEventSink>,
browser_ws_url: &str,
mac_policy: &MacPolicy,
) -> Result<(), PipeError> {
loop {
let Some(message) = sink.recv_client_message()? else {
return Ok(());
};
match message {
ClientMessage::Connect => send_status_changed(sink.as_ref(), "connected")?,
ClientMessage::Start => send_status_changed(sink.as_ref(), "started")?,
ClientMessage::Stop => send_status_changed(sink.as_ref(), "stopped")?,
ClientMessage::SubmitTask {
instruction,
conversation_id,
messages,
page_url,
page_title,
} => {
let request = ClientMessage::SubmitTask {
instruction,
conversation_id,
messages,
page_url: page_url.clone(),
page_title,
}
.into_submit_task_request()
.expect("submit task request");
if let Err(message) = session.try_start_task() {
sink.send_service_message(message)?;
continue;
}
if request.instruction.trim().is_empty() {
let result = sink.send(&AgentMessage::TaskComplete {
success: false,
summary: "请输入任务内容。".to_string(),
});
session.finish_task();
result?;
continue;
}
let result = (|| {
let browser_backend = browser_backend_for_submit(browser_ws_url, mac_policy, &request)?;
run_submit_task_with_browser_backend(
&NoopTransport,
sink.as_ref(),
browser_backend,
context,
request,
)
})();
session.finish_task();
match result {
Ok(()) => {}
Err(PipeError::PipeClosed) => return Err(PipeError::PipeClosed),
Err(err) => {
eprintln!("task execution failed: {err}");
sink.send(&AgentMessage::TaskComplete {
success: false,
summary: format!("任务执行失败: {err}"),
})?;
}
}
}
ClientMessage::Ping => {}
}
}
}
fn browser_backend_for_submit(
browser_ws_url: &str,
mac_policy: &MacPolicy,
request: &SubmitTaskRequest,
) -> Result<Arc<dyn BrowserBackend>, PipeError> {
// Always use BrowserCallbackBackend which opens a real helper page in the
// browser via `sgBrowerserOpenPage`. The helper page acts as a genuine
// browser tab whose URL the browser WS server can route commands to.
//
// WsBrowserBackend is NOT suitable here because:
// 1. It uses a fabricated source URL (e.g. "https://www.zhihu.com") that
// does not correspond to any open tab, so the browser silently drops
// the command.
// 2. It expects a numeric status frame ("0") from the browser WS, but
// the real SuperRPA browser never sends such frames → timeout.
//
// The bootstrap_request_url MUST be the URL of a page that is already open
// in the browser. The browser WS server requires the first element of the
// command array (requestUrl) to match an existing tab; otherwise the
// sgBrowerserOpenPage command is silently ignored.
let bootstrap_request_url = initial_request_url_for_submit_task(request);
let callback_host = Arc::new(LiveBrowserCallbackHost::start_with_browser_ws_url(
browser_ws_url,
&bootstrap_request_url,
Duration::from_secs(15),
BROWSER_RESPONSE_TIMEOUT,
)?);
Ok(Arc::new(BrowserCallbackBackend::new(
callback_host.clone(),
mac_policy.clone(),
callback_host.helper_url().to_string(),
)))
}
pub(crate) fn initial_request_url_for_submit_task(request: &crate::agent::SubmitTaskRequest) -> String {
request
.page_url
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToString::to_string)
.or_else(|| derive_request_url_from_instruction(&request.instruction))
.unwrap_or_else(|| "about:blank".to_string())
}
fn derive_request_url_from_instruction(instruction: &str) -> Option<String> {
if crate::compat::workflow_executor::detect_route(instruction, None, None)
.is_some_and(|route| {
matches!(
route,
crate::compat::workflow_executor::WorkflowRoute::ZhihuHotlistExportXlsx
| crate::compat::workflow_executor::WorkflowRoute::ZhihuHotlistScreen
| crate::compat::workflow_executor::WorkflowRoute::ZhihuArticleEntry
)
})
{
return Some("https://www.zhihu.com".to_string());
}
if crate::compat::workflow_executor::detect_route(instruction, None, None)
.is_some_and(|route| {
matches!(
route,
crate::compat::workflow_executor::WorkflowRoute::ZhihuArticleDraft
| crate::compat::workflow_executor::WorkflowRoute::ZhihuArticlePublish
)
})
{
return Some("https://zhuanlan.zhihu.com".to_string());
}
None
}
pub(crate) struct ServiceWsClient {
websocket: Mutex<WebSocket<MaybeTlsStream<TcpStream>>>,
}
impl ServiceWsClient {
pub(crate) fn connect(browser_ws_url: &str) -> Result<Self, PipeError> {
let (mut websocket, _) = connect(browser_ws_url)
.map_err(|err| PipeError::Protocol(format!("browser websocket connect failed: {err}")))?;
configure_browser_ws_timeouts(&mut websocket, BROWSER_RESPONSE_TIMEOUT)?;
websocket
.send(Message::Text(
r#"{"type":"register","role":"web"}"#.to_string().into(),
))
.map_err(|err| map_service_websocket_error(err, "register"))?;
Ok(Self {
websocket: Mutex::new(websocket),
})
}
}
impl WsClient for ServiceWsClient {
fn send_text(&self, payload: &str) -> Result<(), PipeError> {
self.websocket
.lock()
.map_err(|_| PipeError::Protocol("browser websocket lock poisoned".to_string()))?
.send(Message::Text(payload.to_string().into()))
.map_err(|err| map_service_websocket_error(err, "send"))
}
fn recv_text_timeout(&self, timeout: Duration) -> Result<String, PipeError> {
let mut websocket = self
.websocket
.lock()
.map_err(|_| PipeError::Protocol("browser websocket lock poisoned".to_string()))?;
set_plain_browser_ws_read_timeout(&mut websocket, Some(timeout))?;
loop {
match websocket.read() {
Ok(Message::Text(text)) => return Ok(text.to_string()),
Ok(Message::Close(_)) => return Err(PipeError::PipeClosed),
Ok(Message::Ping(payload)) => {
websocket
.send(Message::Pong(payload))
.map_err(|err| map_service_websocket_error(err, "pong"))?;
}
Ok(_) => {}
Err(err) => return Err(map_service_websocket_error(err, "read")),
}
}
}
}
fn configure_browser_ws_timeouts(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
timeout: Duration,
) -> Result<(), PipeError> {
match websocket.get_mut() {
MaybeTlsStream::Plain(stream) => {
stream.set_read_timeout(Some(timeout))?;
stream.set_write_timeout(Some(timeout))?;
Ok(())
}
_ => Ok(()),
}
}
fn set_plain_browser_ws_read_timeout(
websocket: &mut WebSocket<MaybeTlsStream<TcpStream>>,
timeout: Option<Duration>,
) -> Result<(), PipeError> {
match websocket.get_mut() {
MaybeTlsStream::Plain(stream) => {
stream.set_read_timeout(timeout)?;
Ok(())
}
_ => Ok(()),
}
}
struct NoopTransport;
impl Transport for NoopTransport {
fn send(&self, _message: &AgentMessage) -> Result<(), PipeError> {
Ok(())
}
fn recv_timeout(&self, _timeout: Duration) -> Result<BrowserMessage, PipeError> {
Err(PipeError::Timeout)
}
}
#[cfg(test)]
struct ServiceBridgeTransport {
bridge_base_url: String,
response_timeout: Duration,
}
#[cfg(test)]
impl ServiceBridgeTransport {
fn with_timeout(bridge_base_url: String, response_timeout: Duration) -> Self {
Self {
bridge_base_url,
response_timeout,
}
}
fn endpoint_url(&self, action: &str) -> String {
format!("{}/{}", self.bridge_base_url.trim_end_matches('/'), action)
}
}
#[cfg(test)]
impl BridgeActionTransport for ServiceBridgeTransport {
fn execute(
&self,
request: BridgeBrowserActionRequest,
) -> Result<BridgeBrowserActionReply, PipeError> {
let url = self.endpoint_url(&request.action);
let response_timeout = self.response_timeout;
std::thread::spawn(move || execute_bridge_http_request(url, response_timeout, request))
.join()
.map_err(|_| PipeError::Protocol("browser bridge worker thread panicked".to_string()))?
}
}
#[cfg(test)]
fn execute_bridge_http_request(
url: String,
response_timeout: Duration,
request: BridgeBrowserActionRequest,
) -> Result<BridgeBrowserActionReply, PipeError> {
let BridgeBrowserActionRequest {
action,
params,
expected_domain,
} = request;
let payload = json!([action, params, expected_domain]);
let client = Client::builder()
.timeout(response_timeout)
.build()
.unwrap_or_else(|_| Client::new());
let response = client
.post(url)
.json(&payload)
.send()
.map_err(map_bridge_http_error)?;
let status = response.status();
if !status.is_success() {
return Err(PipeError::Protocol(format!(
"browser bridge request failed: HTTP {status}"
)));
}
let value: Value = response.json().map_err(|err| {
PipeError::Protocol(format!("browser bridge response decode failed: {err}"))
})?;
normalize_bridge_action_reply(value)
}
#[cfg(test)]
fn bridge_base_url_from_browser_ws_url(browser_ws_url: &str) -> String {
let trimmed = browser_ws_url.trim();
if trimmed.is_empty() {
return DEFAULT_BRIDGE_BASE_URL.to_string();
}
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
return trimmed.trim_end_matches('/').to_string();
}
let normalized = trimmed
.strip_prefix("ws://")
.map(|rest| format!("http://{rest}"))
.or_else(|| trimmed.strip_prefix("wss://").map(|rest| format!("https://{rest}")))
.unwrap_or_else(|| trimmed.to_string());
let Ok(parsed) = reqwest::Url::parse(&normalized) else {
return DEFAULT_BRIDGE_BASE_URL.to_string();
};
let host = parsed.host_str().unwrap_or("localhost");
let is_default_browser_ws = parsed.scheme() == "http"
&& parsed.port_or_known_default() == Some(12345)
&& matches!(host, "127.0.0.1" | "localhost");
if is_default_browser_ws {
return DEFAULT_BRIDGE_BASE_URL.to_string();
}
let mut base = format!("{}://{}", parsed.scheme(), host);
if let Some(port) = parsed.port() {
base.push(':');
base.push_str(&port.to_string());
}
base
}
#[cfg(test)]
fn map_bridge_http_error(err: reqwest::Error) -> PipeError {
if err.is_timeout() {
PipeError::Timeout
} else {
PipeError::Protocol(format!("browser bridge request failed: {err}"))
}
}
#[cfg(test)]
fn normalize_bridge_action_reply(value: Value) -> Result<BridgeBrowserActionReply, PipeError> {
if let Ok(reply) = serde_json::from_value::<BridgeBrowserActionReply>(value.clone()) {
return Ok(reply);
}
match value {
Value::Number(number) if number.as_i64() == Some(0) => Ok(bridge_success_reply(serde_json::json!({}))),
Value::String(text) if text.trim() == "0" => Ok(bridge_success_reply(serde_json::json!({}))),
Value::Object(object) => normalize_bridge_action_reply_object(object),
other => Err(PipeError::Protocol(format!(
"invalid browser bridge reply: {other}"
))),
}
}
#[cfg(test)]
fn normalize_bridge_action_reply_object(
object: Map<String, Value>,
) -> Result<BridgeBrowserActionReply, PipeError> {
if let Some(success) = object.get("success").and_then(Value::as_bool) {
return Ok(if success {
bridge_success_reply(success_data_from_object(&object))
} else {
bridge_error_reply(error_message_from_object(&object), error_details_from_object(&object))
});
}
if let Some(code) = object.get("code").and_then(Value::as_i64) {
return Ok(if code == 0 {
bridge_success_reply(success_data_from_object(&object))
} else {
bridge_error_reply(
error_message_from_object(&object),
object_to_value(object.clone()),
)
});
}
if object.contains_key("data") || object.contains_key("result") || object.contains_key("text") {
return Ok(bridge_success_reply(success_data_from_object(&object)));
}
Err(PipeError::Protocol(format!(
"invalid browser bridge reply: {}",
object_to_value(object)
)))
}
#[cfg(test)]
fn bridge_success_reply(data: Value) -> BridgeBrowserActionReply {
BridgeBrowserActionReply::Success(BridgeBrowserActionSuccess {
data,
aom_snapshot: vec![],
timing: Timing {
queue_ms: 0,
exec_ms: 0,
},
})
}
#[cfg(test)]
fn bridge_error_reply(message: String, details: Value) -> BridgeBrowserActionReply {
BridgeBrowserActionReply::Error(BridgeBrowserActionError { message, details })
}
#[cfg(test)]
fn success_data_from_object(object: &Map<String, Value>) -> Value {
object
.get("data")
.cloned()
.or_else(|| object.get("result").cloned())
.or_else(|| object.get("text").cloned().map(|text| json!({ "text": text })))
.unwrap_or_else(|| json!({}))
}
#[cfg(test)]
fn error_message_from_object(object: &Map<String, Value>) -> String {
object
.get("message")
.and_then(Value::as_str)
.or_else(|| object.get("error").and_then(Value::as_str))
.map(ToString::to_string)
.unwrap_or_else(|| "browser bridge action failed".to_string())
}
#[cfg(test)]
fn error_details_from_object(object: &Map<String, Value>) -> Value {
object
.get("details")
.cloned()
.unwrap_or_else(|| object_to_value(object.clone()))
}
#[cfg(test)]
fn object_to_value(object: Map<String, Value>) -> Value {
Value::Object(object)
}
#[cfg(test)]
fn read_http_json_body(stream: &mut impl std::io::Read) -> Value {
let mut buffer = Vec::new();
let mut headers_end = None;
while headers_end.is_none() {
let mut chunk = [0_u8; 1024];
let bytes = std::io::Read::read(stream, &mut chunk).unwrap();
assert!(bytes > 0, "unexpected EOF while reading headers");
buffer.extend_from_slice(&chunk[..bytes]);
headers_end = buffer.windows(4).position(|window| window == b"\r\n\r\n");
}
let headers_end = headers_end.unwrap() + 4;
let headers = String::from_utf8(buffer[..headers_end].to_vec()).unwrap();
let content_length = headers
.lines()
.find_map(|line| {
let (name, value) = line.split_once(':')?;
name.eq_ignore_ascii_case("content-length")
.then(|| value.trim().parse::<usize>().unwrap())
})
.unwrap_or(0);
while buffer.len() < headers_end + content_length {
let mut chunk = vec![0_u8; content_length];
let bytes = std::io::Read::read(stream, &mut chunk).unwrap();
assert!(bytes > 0, "unexpected EOF while reading body");
buffer.extend_from_slice(&chunk[..bytes]);
}
serde_json::from_slice(&buffer[headers_end..headers_end + content_length]).unwrap()
}
#[cfg(test)]
fn write_http_json_response(stream: &mut impl std::io::Write, status: &str, body: &Value) {
let payload = body.to_string();
let response = format!(
"HTTP/1.1 {status}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
payload.len(),
payload
);
std::io::Write::write_all(stream, response.as_bytes()).unwrap();
std::io::Write::flush(stream).unwrap();
}
#[cfg(test)]
mod tests {
use super::*;
use std::net::TcpListener;
use std::sync::{mpsc, Arc};
use std::thread;
use serde_json::json;
use crate::agent::SubmitTaskRequest;
use crate::browser::BrowserBackend;
use crate::pipe::Action;
fn service_test_policy() -> MacPolicy {
MacPolicy::from_json_str(
r#"{
"version": "1.0",
"domains": { "allowed": ["www.baidu.com", "www.zhihu.com"] },
"pipe_actions": {
"allowed": ["navigate", "getText", "eval"],
"blocked": []
}
}"#,
)
.unwrap()
}
#[test]
fn initial_request_url_prefers_submit_task_page_url() {
let request = SubmitTaskRequest {
instruction: "打开知乎热榜".to_string(),
page_url: Some(" https://www.zhihu.com/ ".to_string()),
..SubmitTaskRequest::default()
};
assert_eq!(
initial_request_url_for_submit_task(&request),
"https://www.zhihu.com/"
);
}
#[test]
fn initial_request_url_falls_back_to_zhihu_origin_for_hotlist_routes() {
let request = SubmitTaskRequest {
instruction: "打开知乎热榜获取前10条数据并导出 Excel".to_string(),
..SubmitTaskRequest::default()
};
assert_eq!(
initial_request_url_for_submit_task(&request),
"https://www.zhihu.com"
);
}
#[test]
fn bridge_base_url_defaults_local_browser_ws_endpoint_to_http_bridge() {
assert_eq!(
bridge_base_url_from_browser_ws_url("ws://127.0.0.1:12345"),
"http://localhost:23323"
);
assert_eq!(
bridge_base_url_from_browser_ws_url("ws://localhost:12345"),
"http://localhost:23323"
);
}
#[test]
fn bridge_base_url_maps_non_default_ws_endpoint_to_http_origin() {
assert_eq!(
bridge_base_url_from_browser_ws_url("ws://127.0.0.1:40123"),
"http://127.0.0.1:40123"
);
}
#[test]
fn service_bridge_transport_posts_semantic_request_and_maps_success_reply() {
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let address = listener.local_addr().unwrap();
let base_url = format!("http://{address}");
let (request_tx, request_rx) = mpsc::channel();
let server = thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
let request = read_http_json_body(&mut stream);
request_tx.send(request).unwrap();
write_http_json_response(
&mut stream,
"200 OK",
&json!({
"success": true,
"data": { "text": "天气" }
}),
);
});
let backend = BridgeBrowserBackend::new(
Arc::new(ServiceBridgeTransport::with_timeout(
base_url,
Duration::from_secs(1),
)),
service_test_policy(),
);
let output = backend
.invoke(Action::GetText, json!({ "selector": "body" }), "www.zhihu.com")
.expect("bridge transport should normalize success reply");
let request = request_rx.recv_timeout(Duration::from_secs(1)).unwrap();
server.join().unwrap();
assert_eq!(
request,
json!([
"getText",
{ "selector": "body" },
"www.zhihu.com"
])
);
assert!(output.success);
assert_eq!(output.data, json!({ "text": "天气" }));
}
#[test]
fn service_bridge_transport_maps_bridge_error_reply_to_pipe_error() {
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let address = listener.local_addr().unwrap();
let base_url = format!("http://{address}");
let server = thread::spawn(move || {
let (mut stream, _) = listener.accept().unwrap();
let _request = read_http_json_body(&mut stream);
write_http_json_response(
&mut stream,
"200 OK",
&json!({
"success": false,
"message": "selector not found",
"details": { "selector": "#missing" }
}),
);
});
let backend = BridgeBrowserBackend::new(
Arc::new(ServiceBridgeTransport::with_timeout(
base_url,
Duration::from_secs(1),
)),
service_test_policy(),
);
let error = backend
.invoke(Action::GetText, json!({ "selector": "#missing" }), "www.zhihu.com")
.expect_err("bridge transport should surface semantic bridge failures");
server.join().unwrap();
assert!(matches!(
error,
PipeError::Protocol(message) if message == "bridge action failed: selector not found"
));
}
#[test]
fn service_bridge_transport_maps_http_timeout_to_timeout() {
let transport = ServiceBridgeTransport::with_timeout(
"http://127.0.0.1:1".to_string(),
Duration::from_millis(20),
);
let error = transport
.execute(BridgeBrowserActionRequest::new(
"navigate",
json!({ "url": "https://www.zhihu.com/hot" }),
"www.zhihu.com",
))
.expect_err("unreachable bridge should surface a transport error");
assert!(matches!(error, PipeError::Protocol(_) | PipeError::Timeout));
}
}