From 6068a8228b1a9e1e0012aed8bc970ac82155f0d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=A8=E7=82=8E?= <635735027@qq.com> Date: Mon, 6 Apr 2026 12:09:47 +0800 Subject: [PATCH] feat: realign zhihu browser callback runtime Keep Zhihu browser-attached execution on the callback-host path so direct routes, runtime wiring, and service startup stay aligned for the current websocket browser flow. Co-Authored-By: Claude Sonnet 4.6 --- src/bin/sg_claw_client.rs | 81 +-- src/browser/backend.rs | 8 + src/browser/callback_backend.rs | 555 +++++++++++++++++++- src/browser/callback_host.rs | 338 +++++++++++- src/compat/orchestration.rs | 6 + src/compat/runtime.rs | 38 ++ src/compat/workflow_executor.rs | 880 +++++++++++++++++++++++++++++++- src/runtime/engine.rs | 11 +- src/service/server.rs | 106 ++-- 9 files changed, 1907 insertions(+), 116 deletions(-) diff --git a/src/bin/sg_claw_client.rs b/src/bin/sg_claw_client.rs index 80869b6..f6259b2 100644 --- a/src/bin/sg_claw_client.rs +++ b/src/bin/sg_claw_client.rs @@ -36,43 +36,62 @@ fn run() -> Result<(), String> { .unwrap_or_else(|_| "ws://127.0.0.1:42321".to_string()); let (mut socket, _) = connect(service_url.as_str()).map_err(|err| err.to_string())?; - let mut input = String::new(); - io::stdin() - .lock() - .read_line(&mut input) - .map_err(|err| err.to_string())?; - - let (request, exit_on_status) = parse_request(&input); - - let payload = serde_json::to_string(&request).map_err(|err| err.to_string())?; - socket - .send(Message::Text(payload.into())) - .map_err(|err| err.to_string())?; + let stdin = io::stdin(); loop { - match socket.read().map_err(|err| err.to_string())? { - Message::Text(text) => { - let message: ServiceMessage = - serde_json::from_str(&text).map_err(|err| err.to_string())?; - match message { - ServiceMessage::StatusChanged { state } => { - println!("status: {state}"); - if exit_on_status { - return Ok(()); + eprint!("> "); + let mut input = String::new(); + let bytes_read = stdin + .lock() + .read_line(&mut input) + .map_err(|err| err.to_string())?; + if bytes_read == 0 { + break; // EOF — graceful exit + } + if input.trim().is_empty() { + continue; + } + + let (request, exit_on_status) = parse_request(&input); + + let payload = serde_json::to_string(&request).map_err(|err| err.to_string())?; + socket + .send(Message::Text(payload.into())) + .map_err(|err| err.to_string())?; + + // Inner loop: consume service messages until the task finishes. + loop { + match socket.read().map_err(|err| err.to_string())? { + Message::Text(text) => { + let message: ServiceMessage = + serde_json::from_str(&text).map_err(|err| err.to_string())?; + match message { + ServiceMessage::StatusChanged { state } => { + println!("status: {state}"); + if exit_on_status { + break; + } + } + ServiceMessage::LogEntry { level: _, message } => { + println!("{message}"); + } + ServiceMessage::TaskComplete { success: _, summary } => { + println!("{summary}"); + break; + } + ServiceMessage::Busy { message } => { + eprintln!("busy: {message}"); + break; } } - ServiceMessage::LogEntry { level: _, message } => { - println!("{message}"); - } - ServiceMessage::TaskComplete { success: _, summary } => { - println!("{summary}"); - return Ok(()); - } - ServiceMessage::Busy { message } => return Err(message), } + Message::Close(_) => { + return Err("service disconnected".to_string()); + } + _ => {} } - Message::Close(_) => return Err("service disconnected before task completion".to_string()), - _ => {} } } + + Ok(()) } diff --git a/src/browser/backend.rs b/src/browser/backend.rs index 65afdf8..22a63f3 100644 --- a/src/browser/backend.rs +++ b/src/browser/backend.rs @@ -17,6 +17,10 @@ pub trait BrowserBackend: Send + Sync { fn supports_eval(&self) -> bool { true } + + fn supports_live_input(&self) -> bool { + false + } } impl BrowserBackend for Arc { @@ -36,4 +40,8 @@ impl BrowserBackend for Arc { fn supports_eval(&self) -> bool { self.as_ref().supports_eval() } + + fn supports_live_input(&self) -> bool { + self.as_ref().supports_live_input() + } } diff --git a/src/browser/callback_backend.rs b/src/browser/callback_backend.rs index a7d5229..4c3df4e 100644 --- a/src/browser/callback_backend.rs +++ b/src/browser/callback_backend.rs @@ -8,7 +8,8 @@ use crate::browser::backend::BrowserBackend; use crate::pipe::{Action, CommandOutput, ExecutionSurfaceMetadata, PipeError, Timing}; use crate::security::MacPolicy; -const NAVIGATE_CALLBACK_NAME: &str = "sgclawOnLoaded"; +const CLICK_PROBE_CALLBACK_NAME: &str = "sgclawOnClickProbe"; +const TYPE_PROBE_CALLBACK_NAME: &str = "sgclawOnTypeProbe"; const GET_TEXT_CALLBACK_NAME: &str = "sgclawOnGetText"; const EVAL_CALLBACK_NAME: &str = "sgclawOnEval"; const SHOW_AREA: &str = "show"; @@ -54,6 +55,12 @@ pub struct BrowserCallbackBackend { next_seq: AtomicU64, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CallbackInputMode { + Click, + Type, +} + impl BrowserCallbackBackend { pub fn new( host: Arc, @@ -91,6 +98,8 @@ impl BrowserCallbackBackend { target_url, ])) } + Action::Click => self.build_input_command(action, params, CallbackInputMode::Click), + Action::Type => self.build_input_command(action, params, CallbackInputMode::Type), Action::GetText => { let target_url = self.target_url(action, params)?; let domain = extract_domain(&target_url)?; @@ -129,6 +138,35 @@ impl BrowserCallbackBackend { } } + fn build_input_command( + &self, + action: &Action, + params: &Value, + mode: CallbackInputMode, + ) -> Result { + let target_url = self.target_url(action, params)?; + let domain = extract_domain(&target_url)?; + let selector = optional_string(params, "selector"); + let probe_script = optional_string(params, "probe_script"); + let text = matches!(mode, CallbackInputMode::Type) + .then(|| required_string(params, "text")) + .transpose()?; + let js_code = build_input_probe_js( + mode, + &self.helper_page_url, + selector.as_deref(), + probe_script.as_deref(), + text.as_deref(), + )?; + Ok(json!([ + self.helper_page_url, + "sgBrowserExcuteJsCodeByDomain", + domain, + js_code, + SHOW_AREA, + ])) + } + fn target_url(&self, action: &Action, params: &Value) -> Result { if let Some(target_url) = params .get("target_url") @@ -146,6 +184,117 @@ impl BrowserCallbackBackend { .clone() .ok_or_else(|| PipeError::Protocol(format!("target_url is required for {}", action.as_str()))) } + + fn execute_simulated_click( + &self, + seq: u64, + expected_domain: &str, + success: &BrowserCallbackSuccess, + ) -> Result { + let probe = success + .data + .get("probe") + .ok_or_else(|| PipeError::Protocol("callback click probe payload missing".to_string()))?; + let x = probe + .get("x") + .and_then(Value::as_f64) + .ok_or_else(|| PipeError::Protocol("callback click probe missing x".to_string()))?; + let y = probe + .get("y") + .and_then(Value::as_f64) + .ok_or_else(|| PipeError::Protocol("callback click probe missing y".to_string()))?; + let timing = success.timing.clone(); + match self.host.execute(BrowserCallbackRequest { + seq, + request_url: self.helper_page_url.clone(), + expected_domain: expected_domain.to_string(), + action: Action::Click.as_str().to_string(), + command: json!([ + self.helper_page_url, + "sgBroewserSimulateMouse", + x, + y, + "left", + "", + "" + ]), + }) { + Ok(BrowserCallbackResponse::Error(error)) => Err(PipeError::Protocol(format!( + "callback host browser action failed: {} ({})", + error.message, error.details + ))), + Ok(BrowserCallbackResponse::Success(_)) | Err(PipeError::Timeout) => { + Ok(BrowserCallbackSuccess { + success: true, + data: json!({ + "clicked": true, + "probe": { "x": x, "y": y }, + }), + aom_snapshot: vec![], + timing, + }) + } + Err(error) => Err(error), + } + } + + fn execute_simulated_type( + &self, + seq: u64, + expected_domain: &str, + params: &Value, + success: &BrowserCallbackSuccess, + ) -> Result { + let probe = success + .data + .get("probe") + .ok_or_else(|| PipeError::Protocol("callback type probe payload missing".to_string()))?; + let x = probe + .get("x") + .and_then(Value::as_f64) + .ok_or_else(|| PipeError::Protocol("callback type probe missing x".to_string()))?; + let y = probe + .get("y") + .and_then(Value::as_f64) + .ok_or_else(|| PipeError::Protocol("callback type probe missing y".to_string()))?; + let text = params + .get("text") + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .ok_or_else(|| PipeError::Protocol("text is required".to_string()))?; + let timing = success.timing.clone(); + match self.host.execute(BrowserCallbackRequest { + seq, + request_url: self.helper_page_url.clone(), + expected_domain: expected_domain.to_string(), + action: Action::Type.as_str().to_string(), + command: json!([ + self.helper_page_url, + "sgBroewserSimulateKeyborad", + x, + y, + text + ]), + }) { + Ok(BrowserCallbackResponse::Error(error)) => Err(PipeError::Protocol(format!( + "callback host browser action failed: {} ({})", + error.message, error.details + ))), + Ok(BrowserCallbackResponse::Success(_)) | Err(PipeError::Timeout) => { + Ok(BrowserCallbackSuccess { + success: true, + data: json!({ + "typed": true, + "probe": { "x": x, "y": y, "text": text }, + }), + aom_snapshot: vec![], + timing, + }) + } + Err(error) => Err(error), + } + } } impl BrowserBackend for BrowserCallbackBackend { @@ -168,6 +317,13 @@ impl BrowserBackend for BrowserCallbackBackend { match reply { BrowserCallbackResponse::Success(success) => { + let success = match action { + Action::Click => self.execute_simulated_click(seq, expected_domain, &success)?, + Action::Type => { + self.execute_simulated_type(seq, expected_domain, ¶ms, &success)? + } + _ => success, + }; if matches!(action, Action::Navigate) { if let Some(url) = params .get("url") @@ -202,6 +358,11 @@ impl BrowserBackend for BrowserCallbackBackend { fn supports_eval(&self) -> bool { self.mac_policy.supports_pipe_action(&Action::Eval) } + + fn supports_live_input(&self) -> bool { + self.mac_policy.supports_pipe_action(&Action::Click) + && self.mac_policy.supports_pipe_action(&Action::Type) + } } fn required_string(params: &Value, key: &str) -> Result { @@ -214,6 +375,15 @@ fn required_string(params: &Value, key: &str) -> Result { .ok_or_else(|| PipeError::Protocol(format!("{key} is required"))) } +fn optional_string(params: &Value, key: &str) -> Option { + params + .get(key) + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToString::to_string) +} + fn build_get_text_js(source_url: &str, selector: &str) -> String { let escaped_source_url = escape_js_single_quoted(source_url); let escaped_selector = escape_js_single_quoted(selector); @@ -259,6 +429,62 @@ fn build_eval_js(source_url: &str, script: &str) -> String { ) } +fn build_input_probe_js( + mode: CallbackInputMode, + source_url: &str, + selector: Option<&str>, + probe_script: Option<&str>, + text: Option<&str>, +) -> Result { + let escaped_source_url = escape_js_single_quoted(source_url); + let callback = match mode { + CallbackInputMode::Click => CLICK_PROBE_CALLBACK_NAME, + CallbackInputMode::Type => TYPE_PROBE_CALLBACK_NAME, + }; + let events_url = escape_js_single_quoted(&events_endpoint_url(source_url)); + let payload_expression = match mode { + CallbackInputMode::Click => "JSON.stringify({x:x,y:y})".to_string(), + CallbackInputMode::Type => { + let escaped_text = escape_js_single_quoted(text.unwrap_or_default()); + format!("JSON.stringify({{x:x,y:y,text:'{escaped_text}'}})") + } + }; + let payload_object = match mode { + CallbackInputMode::Click => "{x:x,y:y}".to_string(), + CallbackInputMode::Type => { + let escaped_text = escape_js_single_quoted(text.unwrap_or_default()); + format!("{{x:x,y:y,text:'{escaped_text}'}}") + } + }; + let element_lookup = if let Some(script) = probe_script { + format!("(function(){{{script}}})()") + } else if let Some(selector) = selector { + let escaped_selector = escape_js_single_quoted(selector); + format!("document.querySelector('{escaped_selector}')") + } else { + return Err(PipeError::Protocol( + "selector or probe_script is required".to_string(), + )); + }; + let missing_hint = selector + .map(|value| format!("selector not found: {}", escape_js_single_quoted(value))) + .unwrap_or_else(|| "input probe target not found".to_string()); + + Ok(format!( + "(function(){{try{{\ + var el={element_lookup};\ + if(!el){{throw new Error('{missing_hint}');}}\ + var rect=(typeof el.getBoundingClientRect==='function')?el.getBoundingClientRect():null;\ + var x=rect?(rect.left+(rect.width/2)):0;\ + var y=rect?(rect.top+(rect.height/2)):0;\ + try{{callBackJsToCpp('{escaped_source_url}@_@'+window.location.href+'@_@{callback}@_@sgBrowserExcuteJsCodeByDomain@_@'+String({payload_expression}))}}catch(_){{}}\ + var j=JSON.stringify({{type:'callback',callback:'{callback}',request_url:'{escaped_source_url}',payload:{payload_object}}});\ + try{{var r=new XMLHttpRequest();r.open('POST','{events_url}',true);r.setRequestHeader('Content-Type','application/json');r.send(j)}}catch(_){{}}\ + try{{navigator.sendBeacon('{events_url}',new Blob([j],{{type:'application/json'}}))}}catch(_){{}}\ + }}catch(e){{}}}})()" + )) +} + /// Derive the callback host events endpoint URL from the helper page URL. /// e.g. "http://127.0.0.1:62819/sgclaw/browser-helper.html" /// → "http://127.0.0.1:62819/sgclaw/callback/events" @@ -297,5 +523,330 @@ fn extract_domain(url: &str) -> Result { } fn escape_js_single_quoted(raw: &str) -> String { - raw.replace('\\', "\\\\").replace('\'', "\\'") + raw.replace('\\', "\\\\") + .replace('\'', "\\'") + .replace('\n', "\\n") + .replace('\r', "\\r") + .replace('\0', "\\0") + .replace('\u{2028}', "\\u2028") + .replace('\u{2029}', "\\u2029") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::VecDeque; + + fn test_policy() -> MacPolicy { + MacPolicy::from_json_str( + r#"{ + "version": "1.0", + "domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] }, + "pipe_actions": { + "allowed": ["click", "type", "navigate", "getText", "eval"], + "blocked": [] + } + }"#, + ) + .unwrap() + } + + struct FakeCallbackHost { + requests: Mutex>, + replies: Mutex>>, + } + + impl FakeCallbackHost { + fn new(replies: Vec>) -> Self { + Self { + requests: Mutex::new(Vec::new()), + replies: Mutex::new(VecDeque::from(replies)), + } + } + + fn requests(&self) -> Vec { + self.requests.lock().unwrap().clone() + } + } + + impl BrowserCallbackHost for FakeCallbackHost { + fn execute(&self, request: BrowserCallbackRequest) -> Result { + self.requests.lock().unwrap().push(request); + self.replies + .lock() + .unwrap() + .pop_front() + .unwrap_or_else(|| Err(PipeError::Timeout)) + } + } + + fn success_reply(data: Value) -> Result { + Ok(BrowserCallbackResponse::Success(BrowserCallbackSuccess { + success: true, + data, + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + })) + } + + #[test] + fn callback_backend_click_treats_simulated_mouse_follow_up_as_fire_and_forget() { + let host = Arc::new(FakeCallbackHost::new(vec![success_reply( + json!({ "probe": { "x": 320.5, "y": 240.25 } }), + )])); + let backend = BrowserCallbackBackend::new( + host.clone(), + test_policy(), + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + ); + + let output = backend + .invoke( + Action::Click, + json!({ + "target_url": "https://zhuanlan.zhihu.com/write", + "selector": "button" + }), + "zhuanlan.zhihu.com", + ) + .unwrap(); + + assert!(output.success); + let requests = host.requests(); + assert_eq!(requests.len(), 2); + assert_eq!(requests[1].command, json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateMouse", + 320.5, + 240.25, + "left", + "", + "" + ])); + } + + #[test] + fn callback_backend_click_survives_simulated_mouse_timeout() { + let host = Arc::new(FakeCallbackHost::new(vec![ + success_reply(json!({ "probe": { "x": 320.5, "y": 240.25 } })), + Err(PipeError::Timeout), + ])); + let backend = BrowserCallbackBackend::new( + host.clone(), + test_policy(), + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + ); + + let output = backend + .invoke( + Action::Click, + json!({ + "target_url": "https://zhuanlan.zhihu.com/write", + "selector": "button" + }), + "zhuanlan.zhihu.com", + ) + .expect("simulated mouse timeout should be treated as fire-and-forget success"); + + assert!(output.success); + let requests = host.requests(); + assert_eq!(requests.len(), 2); + } + + #[test] + fn callback_backend_click_uses_domain_probe_then_simulated_mouse_input() { + let host = Arc::new(FakeCallbackHost::new(vec![ + success_reply(json!({ "probe": { "x": 320.5, "y": 240.25 } })), + success_reply(json!({ "clicked": true })), + ])); + let backend = BrowserCallbackBackend::new( + host.clone(), + test_policy(), + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + ); + + let output = backend + .invoke( + Action::Click, + json!({ + "target_url": "https://zhuanlan.zhihu.com/write", + "selector": "button" + }), + "zhuanlan.zhihu.com", + ) + .unwrap(); + + assert!(output.success); + let requests = host.requests(); + assert_eq!(requests.len(), 2); + assert_eq!(requests[0].action, "click"); + assert_eq!(requests[0].command[1], json!("sgBrowserExcuteJsCodeByDomain")); + assert_eq!(requests[0].command[2], json!("zhuanlan.zhihu.com")); + let script = requests[0].command[3].as_str().unwrap(); + assert!(script.contains("document.querySelector('button')")); + assert!(script.contains("sgclawOnClick")); + assert_eq!(requests[1].action, "click"); + assert_eq!(requests[1].command, json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateMouse", + 320.5, + 240.25, + "left", + "", + "" + ])); + } + + #[test] + fn callback_backend_type_treats_simulated_keyboard_follow_up_as_fire_and_forget() { + let host = Arc::new(FakeCallbackHost::new(vec![success_reply( + json!({ "probe": { "x": 160.0, "y": 90.0, "text": "正文" } }), + )])); + let backend = BrowserCallbackBackend::new( + host.clone(), + test_policy(), + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + ); + + let output = backend + .invoke( + Action::Type, + json!({ + "target_url": "https://zhuanlan.zhihu.com/write", + "selector": "div[contenteditable='true']", + "text": "正文" + }), + "zhuanlan.zhihu.com", + ) + .unwrap(); + + assert!(output.success); + let requests = host.requests(); + assert_eq!(requests.len(), 2); + assert_eq!(requests[1].command, json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateKeyborad", + 160.0, + 90.0, + "正文" + ])); + } + + #[test] + fn callback_backend_type_uses_custom_probe_script_when_provided() { + let host = Arc::new(FakeCallbackHost::new(vec![ + success_reply(json!({ "probe": { "x": 160.0, "y": 90.0, "text": "正文" } })), + success_reply(json!({ "typed": true })), + ])); + let backend = BrowserCallbackBackend::new( + host.clone(), + test_policy(), + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + ); + + let output = backend + .invoke( + Action::Type, + json!({ + "target_url": "https://zhuanlan.zhihu.com/write", + "probe_script": "return document.body;", + "text": "正文" + }), + "zhuanlan.zhihu.com", + ) + .unwrap(); + + assert!(output.success); + let requests = host.requests(); + assert_eq!(requests.len(), 2); + let script = requests[0].command[3].as_str().unwrap(); + assert!(script.contains("return document.body;")); + assert!(!script.contains("selector not found: div[contenteditable='true']")); + assert_eq!(requests[1].command, json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateKeyborad", + 160.0, + 90.0, + "正文" + ])); + } + + #[test] + fn callback_backend_type_uses_domain_probe_then_simulated_keyboard_input() { + let host = Arc::new(FakeCallbackHost::new(vec![ + success_reply(json!({ "probe": { "x": 160.0, "y": 90.0, "text": "正文" } })), + success_reply(json!({ "typed": true })), + ])); + let backend = BrowserCallbackBackend::new( + host.clone(), + test_policy(), + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + ); + + let output = backend + .invoke( + Action::Type, + json!({ + "target_url": "https://zhuanlan.zhihu.com/write", + "selector": "div[contenteditable='true']", + "text": "正文" + }), + "zhuanlan.zhihu.com", + ) + .unwrap(); + + assert!(output.success); + let requests = host.requests(); + assert_eq!(requests.len(), 2); + assert_eq!(requests[0].action, "type"); + assert_eq!(requests[0].command[1], json!("sgBrowserExcuteJsCodeByDomain")); + assert_eq!(requests[0].command[2], json!("zhuanlan.zhihu.com")); + let script = requests[0].command[3].as_str().unwrap(); + assert!(script.contains("document.querySelector('div[contenteditable=\\'true\\']')")); + assert!(script.contains("sgclawOnType")); + assert!(!script.contains("el.value=")); + assert_eq!(requests[1].action, "type"); + assert_eq!(requests[1].command, json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateKeyborad", + 160.0, + 90.0, + "正文" + ])); + } + + #[test] + fn escape_js_single_quoted_escapes_newlines_and_control_chars() { + let raw = "第一行\n第二行\r\n第三行"; + let escaped = escape_js_single_quoted(raw); + assert!(!escaped.contains('\n'), "literal newline must be escaped"); + assert!(!escaped.contains('\r'), "literal carriage return must be escaped"); + assert!(escaped.contains("\\n"), "should contain escaped newline"); + assert!(escaped.contains("\\r"), "should contain escaped carriage return"); + assert_eq!(escaped, "第一行\\n第二行\\r\\n第三行"); + } + + #[test] + fn type_probe_script_with_multiline_text_is_valid_js() { + let text_with_newlines = "标题\n\n正文第一段\n正文第二段"; + let js = build_input_probe_js( + CallbackInputMode::Type, + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + Some("div[contenteditable='true']"), + None, + Some(text_with_newlines), + ) + .unwrap(); + // The generated JS must NOT contain literal newlines inside single-quoted strings. + // Split on single quotes and check inner segments. + assert!( + !js.contains("标题\n"), + "literal newline must not appear in the JS probe script" + ); + assert!(js.contains("标题\\n")); + assert!(js.contains("sgclawOnTypeProbe")); + } } diff --git a/src/browser/callback_host.rs b/src/browser/callback_host.rs index 10ce627..437da8a 100644 --- a/src/browser/callback_host.rs +++ b/src/browser/callback_host.rs @@ -27,6 +27,10 @@ const COMMAND_POLL_INTERVAL: Duration = Duration::from_millis(25); const HELPER_POLL_INTERVAL: Duration = Duration::from_millis(50); const HELPER_BOOTSTRAP_ACTION: &str = "sgBrowerserOpenPage"; const NAVIGATE_CALLBACK_NAME: &str = "sgclawOnLoaded"; +const CLICK_PROBE_CALLBACK_NAME: &str = "sgclawOnClickProbe"; +const CLICK_CALLBACK_NAME: &str = "sgclawOnClick"; +const TYPE_PROBE_CALLBACK_NAME: &str = "sgclawOnTypeProbe"; +const TYPE_CALLBACK_NAME: &str = "sgclawOnType"; const GET_TEXT_CALLBACK_NAME: &str = "sgclawOnGetText"; const EVAL_CALLBACK_NAME: &str = "sgclawOnEval"; @@ -196,6 +200,15 @@ impl BrowserCallbackHost { pub(crate) fn acknowledge_in_flight_command(&self) -> Option { self.state.lock().unwrap().in_flight_command.take() } + + /// Clear all pending state so the host can be reused for the next task + /// without reopening the helper page. + pub(crate) fn reset_pending_state(&self) { + let mut state = self.state.lock().unwrap(); + state.pending_results.clear(); + state.pending_commands.clear(); + state.in_flight_command = None; + } } impl LiveBrowserCallbackHost { @@ -241,6 +254,25 @@ impl LiveBrowserCallbackHost { pub(crate) fn helper_url(&self) -> &str { self.host.helper_url() } + + pub(crate) fn reset_pending_state(&self) { + self.host.reset_pending_state(); + } +} + +fn command_is_fire_and_forget(request: &BrowserCallbackRequest) -> bool { + if request.action == "navigate" { + return true; + } + + request + .command + .as_array() + .and_then(|items| items.get(1)) + .and_then(Value::as_str) + .is_some_and(|opcode| { + opcode == "sgBroewserSimulateMouse" || opcode == "sgBroewserSimulateKeyborad" + }) } impl BrowserCallbackExecutor for LiveBrowserCallbackHost { @@ -250,10 +282,11 @@ impl BrowserCallbackExecutor for LiveBrowserCallbackHost { self.host.enqueue_command(command_from_request(&request.command)?); // Navigate uses sgBrowerserOpenPage which opens a new tab without a JS - // callback. We only wait long enough for the helper page to pick up the - // command via its 250 ms poll interval and forward it over WebSocket. - // The caller (workflow executor) polls for page readiness separately. - let is_fire_and_forget = request.action == "navigate"; + // callback. Simulated mouse/keyboard follow-up commands also do not emit + // a helper-page callback; the caller validates their effect with a later + // eval/get-text step. We only wait long enough for the helper page poller + // to ACK and forward those commands. + let is_fire_and_forget = command_is_fire_and_forget(&request); let timeout = if is_fire_and_forget { Duration::from_millis(1500) } else { @@ -635,6 +668,33 @@ fn normalize_callback_result( timing: elapsed_timing(elapsed), })) } + "click" if result.callback == CLICK_PROBE_CALLBACK_NAME => { + let x = result.payload.get("x").and_then(Value::as_f64)?; + let y = result.payload.get("y").and_then(Value::as_f64)?; + Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { + success: true, + data: json!({ + "probe": { "x": x, "y": y }, + "callback": CLICK_CALLBACK_NAME, + }), + aom_snapshot: vec![], + timing: elapsed_timing(elapsed), + })) + } + "type" if result.callback == TYPE_PROBE_CALLBACK_NAME => { + let x = result.payload.get("x").and_then(Value::as_f64)?; + let y = result.payload.get("y").and_then(Value::as_f64)?; + let text = result.payload.get("text").and_then(Value::as_str).unwrap_or_default(); + Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { + success: true, + data: json!({ + "probe": { "x": x, "y": y, "text": text }, + "callback": TYPE_CALLBACK_NAME, + }), + aom_snapshot: vec![], + timing: elapsed_timing(elapsed), + })) + } // Path A: The browser's native callBackJsToCpp routes the callback to // the helper page and calls sgclawOnGetText / sgclawOnEval directly. // The helper page POSTs to the events endpoint with the callback name @@ -661,7 +721,7 @@ fn normalize_callback_result( // callBackJsToCpp function with the @_@ delimited string. The helper // page parses it and POSTs to the events endpoint with callback: // "callBackJsToCpp" and payload: { raw: "..." }. - "getText" | "eval" if result.callback == "callBackJsToCpp" => { + "getText" | "eval" | "click" | "type" if result.callback == "callBackJsToCpp" => { let raw = result.payload.get("raw").and_then(Value::as_str)?; let parsed = match parse_callback_js_payload(raw) { Ok(parsed) => parsed, @@ -676,12 +736,46 @@ fn normalize_callback_result( if parsed.callback != expected_callback { return None; } - Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { - success: true, - data: json!({ "text": parsed.response_text }), - aom_snapshot: vec![], - timing: elapsed_timing(elapsed), - })) + match request.action.as_str() { + "click" => { + let probe: Value = serde_json::from_str(&parsed.response_text).ok()?; + let x = probe.get("x").and_then(Value::as_f64)?; + let y = probe.get("y").and_then(Value::as_f64)?; + Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { + success: true, + data: json!({ + "probe": { "x": x, "y": y }, + "callback": CLICK_CALLBACK_NAME, + }), + aom_snapshot: vec![], + timing: elapsed_timing(elapsed), + })) + } + "type" => { + let probe: Value = serde_json::from_str(&parsed.response_text).ok()?; + let x = probe.get("x").and_then(Value::as_f64)?; + let y = probe.get("y").and_then(Value::as_f64)?; + let text = probe.get("text").and_then(Value::as_str).unwrap_or_default(); + Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { + success: true, + data: json!({ + "probe": { "x": x, "y": y, "text": text }, + "callback": TYPE_CALLBACK_NAME, + }), + aom_snapshot: vec![], + timing: elapsed_timing(elapsed), + })) + } + _ => { + // getText / eval — return raw text + Some(BrowserCallbackResponse::Success(BrowserCallbackSuccess { + success: true, + data: json!({ "text": parsed.response_text }), + aom_snapshot: vec![], + timing: elapsed_timing(elapsed), + })) + } + } } _ => None, } @@ -713,6 +807,8 @@ fn parse_callback_js_payload(raw: &str) -> Result Result<&'static str, PipeError> { match action { "navigate" => Ok(NAVIGATE_CALLBACK_NAME), + "click" => Ok(CLICK_PROBE_CALLBACK_NAME), + "type" => Ok(TYPE_PROBE_CALLBACK_NAME), "getText" => Ok(GET_TEXT_CALLBACK_NAME), "eval" => Ok(EVAL_CALLBACK_NAME), other => Err(PipeError::Protocol(format!( @@ -731,12 +827,28 @@ fn elapsed_timing(elapsed: Duration) -> Timing { fn build_helper_page_html(loopback_origin: &str, helper_url: &str, browser_ws_url: &str) -> String { format!( r#" - - - - sgClaw Browser Helper - +sgClaw · Runtime Console + +

sgClaw · Runtime Console

Browser Automation Agent

Connecting…
+
Commands
0
Callbacks
0
Uptime
0s
+
Current
Initializing…
+
+
sgClaw v0.1 · Browser Callback Host
@@ -945,6 +1106,40 @@ mod tests { ); } + #[test] + fn live_callback_host_treats_simulated_mouse_command_as_fire_and_forget() { + use crate::browser::callback_backend::{ + BrowserCallbackHost as BrowserCallbackExecutor, BrowserCallbackRequest, + }; + use std::sync::atomic::AtomicBool; + + let host = LiveBrowserCallbackHost { + host: Arc::new(BrowserCallbackHost::new()), + shutdown: Arc::new(AtomicBool::new(false)), + server_thread: Mutex::new(None), + command_lock: Mutex::new(()), + result_timeout: Duration::from_millis(10), + }; + + let response = host.execute(BrowserCallbackRequest { + seq: 1, + request_url: "http://127.0.0.1:17888/sgclaw/browser-helper.html".to_string(), + expected_domain: "zhuanlan.zhihu.com".to_string(), + action: "click".to_string(), + command: json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBroewserSimulateMouse", + 320.5, + 240.25, + "left", + "", + "" + ]), + }); + + assert!(response.is_ok(), "simulated mouse follow-up should not wait for a callback"); + } + #[test] fn callback_host_exposes_loopback_helper_url_and_release_helper_html() { let host = BrowserCallbackHost::new(); @@ -959,6 +1154,10 @@ mod tests { assert!(html.contains(r#"JSON.stringify({ type: 'register', role: 'web' })"#)); assert!(html.contains("sgclawReady")); assert!(html.contains("sgclawOnLoaded")); + assert!(html.contains("sgclawOnClickProbe")); + assert!(html.contains("sgclawOnClick")); + assert!(html.contains("sgclawOnTypeProbe")); + assert!(html.contains("sgclawOnType")); assert!(html.contains("sgclawOnGetText")); assert!(html.contains("sgclawOnEval")); assert!(html.contains("/sgclaw/callback/ready")); @@ -1102,4 +1301,107 @@ mod tests { ); assert!(host.acknowledge_in_flight_command().is_none()); } + + // ── Path B callBackJsToCpp normalization tests ──────────────────── + + use super::normalize_callback_result; + use crate::browser::callback_backend::BrowserCallbackRequest; + + fn make_request(action: &str) -> BrowserCallbackRequest { + BrowserCallbackRequest { + seq: 1, + request_url: "http://127.0.0.1:17888/sgclaw/browser-helper.html".to_string(), + expected_domain: "zhuanlan.zhihu.com".to_string(), + action: action.to_string(), + command: json!([ + "http://127.0.0.1:17888/sgclaw/browser-helper.html", + "sgBrowserExcuteJsCodeByDomain", + "zhuanlan.zhihu.com", + "(function(){ /* probe */ })()" + ]), + } + } + + fn make_callback_js_to_cpp_result(raw: &str) -> CallbackResult { + CallbackResult { + callback: "callBackJsToCpp".to_string(), + request_url: "http://127.0.0.1:17888/sgclaw/browser-helper.html".to_string(), + target_url: Some("https://zhuanlan.zhihu.com/write".to_string()), + action: Some("sgBrowserExcuteJsCodeByDomain".to_string()), + payload: json!({ "raw": raw }), + } + } + + #[test] + fn normalize_callback_result_path_b_click_probe() { + let request = make_request("click"); + let raw = "https://zhuanlan.zhihu.com/write@_@https://zhuanlan.zhihu.com/write@_@sgclawOnClickProbe@_@sgBrowserExcuteJsCodeByDomain@_@{\"x\":320.5,\"y\":240.25}"; + let result = make_callback_js_to_cpp_result(raw); + + let response = normalize_callback_result(&request, result, Duration::from_millis(10)); + assert!(response.is_some(), "Path B click should produce a response"); + match response.unwrap() { + super::super::callback_backend::BrowserCallbackResponse::Success(s) => { + let probe = s.data.get("probe").expect("should have probe"); + assert_eq!(probe.get("x").unwrap().as_f64().unwrap(), 320.5); + assert_eq!(probe.get("y").unwrap().as_f64().unwrap(), 240.25); + assert_eq!( + s.data.get("callback").unwrap().as_str().unwrap(), + "sgclawOnClick" + ); + } + other => panic!("expected Success, got {other:?}"), + } + } + + #[test] + fn normalize_callback_result_path_b_type_probe() { + let request = make_request("type"); + let raw = "https://zhuanlan.zhihu.com/write@_@https://zhuanlan.zhihu.com/write@_@sgclawOnTypeProbe@_@sgBrowserExcuteJsCodeByDomain@_@{\"x\":100,\"y\":200,\"text\":\"hello\"}"; + let result = make_callback_js_to_cpp_result(raw); + + let response = normalize_callback_result(&request, result, Duration::from_millis(10)); + assert!(response.is_some(), "Path B type should produce a response"); + match response.unwrap() { + super::super::callback_backend::BrowserCallbackResponse::Success(s) => { + let probe = s.data.get("probe").expect("should have probe"); + assert_eq!(probe.get("x").unwrap().as_f64().unwrap(), 100.0); + assert_eq!(probe.get("y").unwrap().as_f64().unwrap(), 200.0); + assert_eq!(probe.get("text").unwrap().as_str().unwrap(), "hello"); + assert_eq!( + s.data.get("callback").unwrap().as_str().unwrap(), + "sgclawOnType" + ); + } + other => panic!("expected Success, got {other:?}"), + } + } + + #[test] + fn normalize_callback_result_path_b_click_wrong_callback_returns_none() { + let request = make_request("click"); + // callback name is sgclawOnTypeProbe (wrong for click action) + let raw = "https://zhuanlan.zhihu.com/write@_@https://zhuanlan.zhihu.com/write@_@sgclawOnTypeProbe@_@sgBrowserExcuteJsCodeByDomain@_@{\"x\":1,\"y\":2}"; + let result = make_callback_js_to_cpp_result(raw); + + let response = normalize_callback_result(&request, result, Duration::from_millis(10)); + assert!(response.is_none(), "mismatched callback name should return None"); + } + + #[test] + fn normalize_callback_result_path_b_eval_still_works() { + let request = make_request("eval"); + let raw = "https://zhuanlan.zhihu.com/write@_@https://zhuanlan.zhihu.com/write@_@sgclawOnEval@_@sgBrowserExcuteJsCodeByDomain@_@{\"status\":\"ok\"}"; + let result = make_callback_js_to_cpp_result(raw); + + let response = normalize_callback_result(&request, result, Duration::from_millis(10)); + assert!(response.is_some(), "Path B eval should still work"); + match response.unwrap() { + super::super::callback_backend::BrowserCallbackResponse::Success(s) => { + let text = s.data.get("text").unwrap().as_str().unwrap(); + assert_eq!(text, r#"{"status":"ok"}"#); + } + other => panic!("expected Success, got {other:?}"), + } + } } diff --git a/src/compat/orchestration.rs b/src/compat/orchestration.rs index a168062..88675d7 100644 --- a/src/compat/orchestration.rs +++ b/src/compat/orchestration.rs @@ -50,6 +50,7 @@ pub fn execute_task_with_browser_backend( instruction, task_context, route, + settings, ); } } @@ -75,6 +76,7 @@ pub fn execute_task_with_browser_backend( instruction, task_context, route, + settings, ) } (_, Ok(summary)) => Ok(summary), @@ -85,6 +87,7 @@ pub fn execute_task_with_browser_backend( instruction, task_context, route, + settings, ), (None, Err(err)) => Err(err), } @@ -112,6 +115,7 @@ pub fn execute_task_with_sgclaw_settings( instruction, task_context, route, + settings, ); } } @@ -137,6 +141,7 @@ pub fn execute_task_with_sgclaw_settings( instruction, task_context, route, + settings, ) } (_, Ok(summary)) => Ok(summary), @@ -147,6 +152,7 @@ pub fn execute_task_with_sgclaw_settings( instruction, task_context, route, + settings, ), (None, Err(err)) => Err(err), } diff --git a/src/compat/runtime.rs b/src/compat/runtime.rs index b2a449b..b366dc5 100644 --- a/src/compat/runtime.rs +++ b/src/compat/runtime.rs @@ -16,6 +16,7 @@ use crate::compat::config_adapter::{ build_zeroclaw_config_from_sgclaw_settings, resolve_skills_dir_from_sgclaw_settings, }; use crate::compat::event_bridge::log_entry_for_turn_event; +use crate::compat::workflow_executor::parse_generated_article_draft; use crate::compat::openxml_office_tool::OpenXmlOfficeTool; use crate::compat::screen_html_export_tool::ScreenHtmlExportTool; use crate::config::{DeepSeekSettings, OfficeBackend, SgClawSettings}; @@ -101,6 +102,43 @@ pub fn execute_task_with_sgclaw_settings( )) } +pub(crate) fn generate_zhihu_article_draft( + instruction: &str, + topic: &str, + _task_context: &CompatTaskContext, + workspace_root: &Path, + settings: &SgClawSettings, +) -> Result { + let mut generation_settings = settings.clone(); + generation_settings.runtime_profile = crate::runtime::RuntimeProfile::GeneralAssistant; + + let config = build_zeroclaw_config_from_sgclaw_settings(workspace_root, &generation_settings); + let provider = build_provider(&config)?; + let runtime = tokio::runtime::Runtime::new() + .map_err(|err| PipeError::Protocol(format!("failed to create tokio runtime: {err}")))?; + let generation_prompt = format!( + "为知乎文章生成可直接发布的草稿。用户原始请求:{instruction}\n\n主题:{topic}\n\n请严格只输出以下格式,不要添加解释、前言、代码块或其他内容:\n标题:<简洁具体的中文标题>\n正文:<适合知乎发布的中文正文,使用自然段>" + ); + + let generated = runtime.block_on(async move { + provider + .chat_with_system( + Some("You write concise Chinese Zhihu article drafts. Return only the requested title/body format."), + &generation_prompt, + config.default_model.as_deref().unwrap_or("deepseek-chat"), + config.default_temperature, + ) + .await + .map_err(map_anyhow_to_pipe_error) + })?; + + parse_generated_article_draft(&generated).ok_or_else(|| { + PipeError::Protocol(format!( + "generated Zhihu article draft did not match 标题/正文 format: {generated}" + )) + }) +} + pub async fn execute_task_with_provider( transport: &dyn crate::agent::AgentEventSink, browser_backend: Arc, diff --git a/src/compat/workflow_executor.rs b/src/compat/workflow_executor.rs index a7d8b5f..d4c0d31 100644 --- a/src/compat/workflow_executor.rs +++ b/src/compat/workflow_executor.rs @@ -12,6 +12,7 @@ use crate::browser::{BrowserBackend, PipeBrowserBackend}; use crate::compat::openxml_office_tool::OpenXmlOfficeTool; use crate::compat::runtime::CompatTaskContext; use crate::compat::screen_html_export_tool::ScreenHtmlExportTool; +use crate::config::SgClawSettings; use crate::pipe::{ Action, AgentMessage, BrowserPipeTool, ConversationMessage, PipeError, Transport, }; @@ -37,6 +38,7 @@ pub enum WorkflowRoute { ZhihuArticleEntry, ZhihuArticleDraft, ZhihuArticlePublish, + ZhihuArticleAutoPublishGenerated, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -47,7 +49,7 @@ struct HotlistItem { } #[derive(Debug, Clone, PartialEq, Eq)] -struct ArticleDraft { +pub(crate) struct ArticleDraft { title: String, body: String, } @@ -72,6 +74,9 @@ pub fn detect_route( return Some(WorkflowRoute::ZhihuHotlistExportXlsx); } } + if task_requests_zhihu_generated_article_publish(instruction, page_url, page_title) { + return Some(WorkflowRoute::ZhihuArticleAutoPublishGenerated); + } if task_requests_zhihu_article_entry(instruction, page_url, page_title) { return Some(WorkflowRoute::ZhihuArticleEntry); } @@ -92,6 +97,7 @@ pub fn prefers_direct_execution(route: &WorkflowRoute) -> bool { | WorkflowRoute::ZhihuArticleEntry | WorkflowRoute::ZhihuArticleDraft | WorkflowRoute::ZhihuArticlePublish + | WorkflowRoute::ZhihuArticleAutoPublishGenerated ) } @@ -117,6 +123,7 @@ pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bo | WorkflowRoute::ZhihuArticleEntry | WorkflowRoute::ZhihuArticleDraft | WorkflowRoute::ZhihuArticlePublish + | WorkflowRoute::ZhihuArticleAutoPublishGenerated ) } @@ -127,6 +134,7 @@ pub fn execute_route_with_browser_backend( instruction: &str, task_context: &CompatTaskContext, route: WorkflowRoute, + settings: &SgClawSettings, ) -> Result { match route { WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen => { @@ -152,6 +160,8 @@ pub fn execute_route_with_browser_backend( instruction, task_context, false, + false, + None, ), WorkflowRoute::ZhihuArticlePublish => execute_zhihu_article_route( transport, @@ -159,7 +169,19 @@ pub fn execute_route_with_browser_backend( instruction, task_context, true, + false, + None, ), + WorkflowRoute::ZhihuArticleAutoPublishGenerated => { + execute_generated_zhihu_article_publish_route( + transport, + browser_backend.as_ref(), + instruction, + task_context, + workspace_root, + settings, + ) + } } } @@ -170,6 +192,7 @@ pub fn execute_route( instruction: &str, task_context: &CompatTaskContext, route: WorkflowRoute, + settings: &SgClawSettings, ) -> Result { let browser_backend: Arc = Arc::new(PipeBrowserBackend::from_inner(browser_tool.clone())); @@ -180,6 +203,7 @@ pub fn execute_route( instruction, task_context, route, + settings, ) } @@ -444,15 +468,17 @@ fn execute_zhihu_article_route( instruction: &str, task_context: &CompatTaskContext, publish_mode: bool, + publish_authorized: bool, + article_override: Option, ) -> Result { - let Some(article) = extract_article_draft(instruction, &task_context.messages) else { + let Some(article) = article_override.or_else(|| extract_article_draft(instruction, &task_context.messages)) else { return Ok( "这类知乎文章任务需要同时提供标题和正文后我才能继续确定性写作流程。请按“标题:…\\n正文:…”的格式补充内容。" .to_string(), ); }; - if publish_mode && !has_explicit_publish_confirmation(instruction) { + if publish_mode && !publish_authorized && !has_explicit_publish_confirmation(instruction) { return Ok(build_publish_confirmation_message(&article)); } @@ -506,17 +532,21 @@ fn execute_zhihu_article_route( level: "info".to_string(), message: "call zhihu-write.fill_article_draft".to_string(), })?; - let fill_result = execute_browser_skill_script( - browser_tool, - "zhihu-write", - "fill_article_draft.js", - json!({ - "title": article.title, - "body": article.body, - "publish_mode": publish_mode.to_string(), - }), - ZHIHU_EDITOR_DOMAIN, - )?; + let fill_result = if browser_tool.supports_live_input() { + execute_zhihu_fill_via_live_input(browser_tool, &article, publish_mode)? + } else { + execute_browser_skill_script( + browser_tool, + "zhihu-write", + "fill_article_draft.js", + json!({ + "title": article.title, + "body": article.body, + "publish_mode": publish_mode.to_string(), + }), + ZHIHU_EDITOR_DOMAIN, + )? + }; if is_login_required_payload(&fill_result) { return Ok(build_login_block_message(payload_current_url(&fill_result))); } @@ -541,6 +571,66 @@ fn execute_zhihu_article_route( } } +fn execute_generated_zhihu_article_publish_route( + transport: &dyn crate::agent::AgentEventSink, + browser_tool: &dyn BrowserBackend, + instruction: &str, + task_context: &CompatTaskContext, + workspace_root: &Path, + settings: &SgClawSettings, +) -> Result { + let Some(topic) = extract_generated_article_topic(instruction) else { + return Ok("请按“在知乎自动发表一篇名称为…”或“在知乎自动发布一篇标题为…”的格式提供文章名称。".to_string()); + }; + + let article = crate::compat::runtime::generate_zhihu_article_draft( + instruction, + &topic, + task_context, + workspace_root, + settings, + )?; + + execute_zhihu_article_route( + transport, + browser_tool, + instruction, + task_context, + true, + true, + Some(article), + ) +} + +fn extract_generated_article_topic(instruction: &str) -> Option { + let normalized = normalize_article_draft_input(instruction); + let name_re = Regex::new(r"(?:名称|标题)(?:是|为)\s*([^,,。\n]+)").expect("valid generated zhihu topic regex"); + name_re + .captures(&normalized) + .and_then(|capture| capture.get(1)) + .map(|value| value.as_str().trim().to_string()) + .filter(|value| !value.is_empty()) +} + +fn task_requests_zhihu_generated_article_publish( + instruction: &str, + page_url: Option<&str>, + page_title: Option<&str>, +) -> bool { + if !crate::runtime::is_zhihu_write_task(instruction, page_url, page_title) { + return false; + } + + let normalized = instruction.to_ascii_lowercase(); + let asks_auto_publish = instruction.contains("自动发表") + || instruction.contains("自动发布") + || (normalized.contains("auto publish") && normalized.contains("zhihu")); + let has_topic_only = extract_generated_article_topic(instruction).is_some(); + let already_has_full_draft = parse_article_draft(instruction).is_some(); + + asks_auto_publish && has_topic_only && !already_has_full_draft +} + fn execute_zhihu_article_entry_route( transport: &dyn crate::agent::AgentEventSink, browser_tool: &dyn BrowserBackend, @@ -703,18 +793,222 @@ fn execute_browser_skill_script( )) } +fn live_input_probe_script(selector_candidates: &[&str]) -> String { + let selectors_json = serde_json::to_string(selector_candidates).expect("valid selector candidates"); + format!( + "var selectors={selectors_json};for(var i=0;i0||rect.height>0;if(!visible){{continue;}}return node;}}}}return null;" + ) +} + +fn execute_zhihu_fill_via_live_input( + browser_tool: &dyn BrowserBackend, + article: &ArticleDraft, + publish_mode: bool, +) -> Result { + let title_probe = live_input_probe_script(&[ + "textarea[placeholder*='标题']", + "input[placeholder*='标题']", + "textarea[data-placeholder*='标题']", + "input[data-placeholder*='标题']", + "[role='textbox'][aria-label*='标题']", + "[contenteditable='true'][aria-label*='标题']", + "[contenteditable='true'][data-placeholder*='标题']", + ]); + let body_probe = live_input_probe_script(&[ + "div[contenteditable='true'][role='textbox']", + "div.public-DraftEditor-content[contenteditable='true']", + "[role='textbox'][contenteditable='true']", + "[contenteditable='true'][data-placeholder]", + "div[contenteditable='true']", + ]); + + // ── Step 1: Click title field ────────────────────────────── + eprintln!("live_input: step 1 — click title field"); + browser_tool.invoke( + Action::Click, + json!({ + "target_url": ZHIHU_EDITOR_URL, + "probe_script": title_probe, + }), + ZHIHU_EDITOR_DOMAIN, + )?; + + // ── Step 2: Animated title typing (eval-based) ────────────── + // Instead of fire-and-forget keyboard simulation, we use a single eval + // that types the title character-by-character using setTimeout. This + // produces a visible typing animation for demo purposes and also uses + // the native value setter so React detects the changes. + let title_json = serde_json::to_string(&article.title).unwrap_or_else(|_| "\"\"".into()); + let title_chars = article.title.chars().count(); + let title_chunk = 3usize; + let title_delay = 50u64; + let title_wait = ((title_chars + title_chunk - 1) / title_chunk) as u64 * title_delay + 300; + let title_script = format!( + r#"(function(){{ +var sels=["textarea[placeholder*='标题']","input[placeholder*='标题']", + "textarea[data-placeholder*='标题']","input[data-placeholder*='标题']", + "[role='textbox'][aria-label*='标题']"]; +var node=null; +for(var i=0;i0&&r.height>0){{node=ns[j];break;}}}}if(node)break;}} +if(!node)return JSON.stringify({{status:'error',msg:'title_not_found'}}); +node.focus(); +var text={title_json}; +var proto=node instanceof HTMLTextAreaElement?HTMLTextAreaElement.prototype:HTMLInputElement.prototype; +var desc=Object.getOwnPropertyDescriptor(proto,'value'); +var set=(desc&&desc.set)?function(v){{desc.set.call(node,v);}}:function(v){{node.value=v;}}; +var typed='',cs={title_chunk},chunks=[]; +for(var i=0;i=chunks.length)return;typed+=chunks[idx];set(typed); + node.dispatchEvent(new Event('input',{{bubbles:true}}));idx++; + if(idx0&&r.height>0){{ed=ns[j];break;}}}}if(ed)break;}} +if(!ed)return JSON.stringify({{status:'error',msg:'body_not_found'}}); +ed.focus(); +var text={body_json};var cs={body_chunk};var chunks=[]; +for(var i=0;i=chunks.length)return; + ed.focus(); + var sel=window.getSelection();var range=document.createRange(); + range.selectNodeContents(ed);range.collapse(false); + sel.removeAllRanges();sel.addRange(range); + var dt=new DataTransfer();dt.setData('text/plain',chunks[idx]); + ed.dispatchEvent(new ClipboardEvent('paste',{{bubbles:true,cancelable:true,clipboardData:dt}})); + idx++;if(idx0&&r.height>0;} + var cs=Array.from(document.querySelectorAll("button,[role='button'],a")).filter(vis); + var btn=cs.find(function(n){return ct(n.textContent)==='发布';}) + ||cs.find(function(n){var t=ct(n.textContent);return t.includes('发布')&&!t.includes('设置');}); + if(!btn)return JSON.stringify({status:'publish_button_missing'}); + if(btn.disabled)return JSON.stringify({status:'publish_button_missing',publish_button_disabled:true}); + btn.click(); + var cs2=Array.from(document.querySelectorAll("button,[role='button'],a")).filter(vis); + var cfm=cs2.find(function(n){return ct(n.textContent)==='确认发布';}); + if(cfm&&!cfm.disabled){cfm.click();return JSON.stringify({status:'publish_submitted'});} + return JSON.stringify({status:'publish_clicked'}); + })()"#; + + let response = browser_tool.invoke( + Action::Eval, + json!({ "script": publish_script }), + ZHIHU_EDITOR_DOMAIN, + )?; + if !response.success { + return Err(PipeError::Protocol(format!( + "publish button click failed: {}", response.data + ))); + } + Ok(normalize_payload( + response.data.get("text").unwrap_or(&response.data), + )) +} + fn navigate_to_editor_after_creator_entry( transport: &dyn crate::agent::AgentEventSink, browser_tool: &dyn BrowserBackend, creator_state: &Value, ) -> Result<(), PipeError> { let status = payload_status(creator_state); - if status == Some("editor_ready") { + let current_url = payload_current_url(creator_state).unwrap_or(""); + if status == Some("editor_ready") && current_url.starts_with(ZHIHU_EDITOR_URL) { return Ok(()); } let target_url = payload_next_url(creator_state).unwrap_or(ZHIHU_EDITOR_URL); - if status == Some("creator_entry_clicked") || status == Some("creator_entry_found") { + if status == Some("creator_entry_clicked") + || status == Some("creator_entry_found") + || status == Some("editor_ready") + { transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: format!("navigate {target_url}"), @@ -741,9 +1035,20 @@ mod tests { use std::collections::VecDeque; use std::sync::{Arc, Mutex}; + use crate::config::SgClawSettings; use crate::pipe::{BrowserMessage, CommandOutput, ExecutionSurfaceMetadata, Timing}; use crate::security::MacPolicy; + fn test_settings() -> SgClawSettings { + SgClawSettings::from_legacy_deepseek_fields( + "test-key".to_string(), + "http://127.0.0.1:9".to_string(), + "deepseek-chat".to_string(), + None, + ) + .unwrap() + } + struct MockWorkflowTransport { sent: Mutex>, responses: Mutex>, @@ -809,6 +1114,7 @@ mod tests { struct FakeBrowserBackend { responses: Mutex>>, invocations: Mutex>, + supports_live_input: bool, } impl FakeBrowserBackend { @@ -816,9 +1122,15 @@ mod tests { Self { responses: Mutex::new(VecDeque::from(responses)), invocations: Mutex::new(Vec::new()), + supports_live_input: false, } } + fn with_live_input(mut self) -> Self { + self.supports_live_input = true; + self + } + fn invocations(&self) -> Vec<(Action, Value, String)> { self.invocations.lock().unwrap().clone() } @@ -845,6 +1157,10 @@ mod tests { fn surface_metadata(&self) -> ExecutionSurfaceMetadata { ExecutionSurfaceMetadata::privileged_browser_pipe("fake_backend") } + + fn supports_live_input(&self) -> bool { + self.supports_live_input + } } #[test] @@ -895,6 +1211,7 @@ mod tests { "打开知乎写文章页面", &CompatTaskContext::default(), WorkflowRoute::ZhihuArticleEntry, + &test_settings(), ) .expect("ws-style backend should satisfy direct route execution"); @@ -935,6 +1252,532 @@ mod tests { ); } + #[test] + fn execute_route_with_browser_backend_navigates_to_editor_when_creator_script_misreports_ready_on_www() { + let transport = Arc::new(MockWorkflowTransport::new(vec![])); + let backend = Arc::new(FakeBrowserBackend::new(vec![ + Ok(CommandOutput { + seq: 1, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 2, + success: true, + data: json!({ + "text": { + "status": "editor_ready", + "current_url": "https://www.zhihu.com/", + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 3, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 4, + success: true, + data: json!({ + "text": { + "status": "editor_ready", + "current_url": ZHIHU_EDITOR_URL, + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + ])); + + let summary = execute_route_with_browser_backend( + transport.as_ref(), + backend.clone(), + Path::new("."), + "打开知乎写文章页面", + &CompatTaskContext::default(), + WorkflowRoute::ZhihuArticleEntry, + &test_settings(), + ) + .expect("route should recover by navigating to real editor url"); + + assert_eq!(summary, "已进入知乎文章编辑器。"); + assert_eq!( + backend.invocations(), + vec![ + ( + Action::Navigate, + json!({ "url": ZHIHU_CREATOR_URL }), + ZHIHU_DOMAIN.to_string(), + ), + ( + Action::Eval, + json!({ + "script": load_browser_skill_script( + "zhihu-navigate", + "open_creator_entry.js", + json!({ "desired_target": "article_editor" }) + ) + .expect("zhihu navigate script should load") + }), + ZHIHU_DOMAIN.to_string(), + ), + ( + Action::Navigate, + json!({ "url": ZHIHU_EDITOR_URL }), + ZHIHU_EDITOR_DOMAIN.to_string(), + ), + ( + Action::Eval, + json!({ + "script": load_browser_skill_script( + "zhihu-write", + "prepare_article_editor.js", + json!({ "desired_mode": "draft" }) + ) + .expect("zhihu write script should load") + }), + ZHIHU_EDITOR_DOMAIN.to_string(), + ), + ] + ); + } + + #[test] + fn execute_route_with_browser_backend_keeps_eval_for_fill_script_without_live_input_support() { + let transport = Arc::new(MockWorkflowTransport::new(vec![])); + let backend = Arc::new(FakeBrowserBackend::new(vec![ + Ok(CommandOutput { + seq: 1, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 2, + success: true, + data: json!({ + "text": { + "status": "creator_entry_clicked", + "current_url": "https://www.zhihu.com/creator", + "next_url": ZHIHU_EDITOR_URL, + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 3, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 4, + success: true, + data: json!({ + "text": { + "status": "editor_ready", + "current_url": ZHIHU_EDITOR_URL, + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 5, + success: true, + data: json!({ + "text": { + "status": "draft_ready", + "current_url": ZHIHU_EDITOR_URL, + "title": "测试标题" + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + ])); + + let summary = execute_zhihu_article_route( + transport.as_ref(), + backend.as_ref(), + "标题:测试标题\n正文:第一段内容", + &CompatTaskContext::default(), + false, + false, + None, + ) + .expect("route should succeed"); + + assert_eq!(summary, "已进入知乎文章编辑器并写入草稿《测试标题》"); + let invocations = backend.invocations(); + assert_eq!(invocations.len(), 5); + assert_eq!(invocations[4].0, Action::Eval); + } + + #[test] + fn execute_route_with_browser_backend_uses_live_input_probes_for_zhihu_fill_when_supported() { + let transport = Arc::new(MockWorkflowTransport::new(vec![])); + let backend = Arc::new(FakeBrowserBackend::new(vec![ + Ok(CommandOutput { + seq: 1, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 2, + success: true, + data: json!({ + "text": { + "status": "creator_entry_clicked", + "current_url": "https://www.zhihu.com/creator", + "next_url": ZHIHU_EDITOR_URL, + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 3, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 4, + success: true, + data: json!({ + "text": { + "status": "editor_ready", + "current_url": ZHIHU_EDITOR_URL, + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 5, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 6, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 7, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 8, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + Ok(CommandOutput { + seq: 9, + success: true, + data: json!({ + "text": { + "status": "draft_ready", + "current_url": ZHIHU_EDITOR_URL, + "title": "测试标题" + } + }), + aom_snapshot: vec![], + timing: Timing { + queue_ms: 1, + exec_ms: 1, + }, + }), + ]).with_live_input()); + + let summary = execute_zhihu_article_route( + transport.as_ref(), + backend.as_ref(), + "标题:测试标题\n正文:第一段内容", + &CompatTaskContext::default(), + false, + false, + None, + ) + .expect("route should succeed"); + + assert_eq!(summary, "已进入知乎文章编辑器并写入草稿《测试标题》"); + let invocations = backend.invocations(); + assert_eq!(invocations.len(), 9); + assert_eq!(invocations[4].0, Action::Click); + assert!(invocations[4].1["probe_script"] + .as_str() + .is_some_and(|script| script.contains("placeholder*='标题'"))); + assert_eq!(invocations[5].0, Action::Eval); + assert!(invocations[5].1["script"] + .as_str() + .is_some_and(|s| s.contains("测试标题") && s.contains("placeholder*='标题'"))); + assert_eq!(invocations[6].0, Action::Click); + assert!(invocations[6].1["probe_script"] + .as_str() + .is_some_and(|script| script.contains("contenteditable='true'"))); + assert_eq!(invocations[7].0, Action::Eval); + assert!(invocations[7].1["script"] + .as_str() + .is_some_and(|s| s.contains("第一段内容") && s.contains("ClipboardEvent"))); + assert_eq!(invocations[8].0, Action::Eval); + assert_eq!(invocations[8].1["script"], json!( + load_browser_skill_script( + "zhihu-write", + "fill_article_draft.js", + json!({ + "title": "测试标题", + "body": "第一段内容", + "publish_mode": "false", + "input_mode": "live_input", + }) + ) + .expect("zhihu write fill script should load") + )); + } + + #[test] + fn live_input_zhihu_body_type_preserves_multiline_content() { + let transport = Arc::new(MockWorkflowTransport::new(vec![])); + let backend = Arc::new(FakeBrowserBackend::new(vec![ + Ok(CommandOutput { + seq: 1, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 2, + success: true, + data: json!({ + "text": { + "status": "creator_entry_clicked", + "current_url": "https://www.zhihu.com/creator", + "next_url": ZHIHU_EDITOR_URL, + } + }), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 3, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 4, + success: true, + data: json!({ "text": { "status": "editor_ready", "current_url": ZHIHU_EDITOR_URL } }), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 5, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 5, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 6, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 7, + success: true, + data: json!({}), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + Ok(CommandOutput { + seq: 9, + success: true, + data: json!({ + "text": { + "status": "draft_ready", + "current_url": ZHIHU_EDITOR_URL, + "title": "测试标题", + "body_text": "第一段内容 第二段内容" + } + }), + aom_snapshot: vec![], + timing: Timing { queue_ms: 1, exec_ms: 1 }, + }), + ]).with_live_input()); + + let _ = execute_zhihu_article_route( + transport.as_ref(), + backend.as_ref(), + "标题:测试标题\n正文:第一段内容\n第二段内容", + &CompatTaskContext::default(), + false, + false, + None, + ) + .expect("route should succeed"); + + let invocations = backend.invocations(); + assert_eq!(invocations[7].0, Action::Eval); + assert!(invocations[7].1["script"] + .as_str() + .is_some_and(|s| s.contains("第一段内容\\n第二段内容") && s.contains("ClipboardEvent"))); + } + + #[test] + fn zhihu_fill_script_checks_live_input_before_dom_fill_fallback() { + let script = load_browser_skill_script( + "zhihu-write", + "fill_article_draft.js", + json!({ + "title": "测试标题", + "body": "第一段内容", + "publish_mode": "false", + "input_mode": "live_input", + }), + ) + .expect("zhihu write fill script should load"); + + let live_input_index = script + .find("const liveInputMode = String(args.input_mode || '').toLowerCase() === 'live_input';") + .expect("live_input switch should exist"); + let fill_input_index = script + .find("fillInput(titleInput, String(args.title || ''));") + .expect("title DOM fill should exist"); + let fill_editable_index = script + .find("fillEditable(bodyEditor, String(args.body || ''));") + .expect("body DOM fill should exist"); + + assert!( + live_input_index < fill_input_index, + "live_input check must run before title DOM fill fallback" + ); + assert!( + live_input_index < fill_editable_index, + "live_input check must run before body DOM fill fallback" + ); + } + + #[test] + fn zhihu_fill_script_live_input_uses_editor_content_instead_of_whole_page_text() { + let script = load_browser_skill_script( + "zhihu-write", + "fill_article_draft.js", + json!({ + "title": "测试标题", + "body": "第一段内容", + "publish_mode": "true", + "input_mode": "live_input", + }), + ) + .expect("zhihu write fill script should load"); + + assert!( + script.contains("var draftState = collectDraftState(titleInput, bodyEditor);"), + "live_input mode must validate the actual title/body editor state" + ); + assert!( + !script.contains("const liveBodyText = cleanText(pageText());"), + "live_input mode must not treat whole-page text as editor body content" + ); + let draft_state_index = script + .find("var draftState = collectDraftState(titleInput, bodyEditor);") + .expect("draft state validation should exist"); + let publish_button_index = script + .find("const publishButton = findPreferredButtonByText('发布');") + .expect("publish button lookup should exist"); + assert!( + draft_state_index < publish_button_index, + "live_input mode must validate editor content before attempting publish" + ); + } + #[test] fn execute_route_with_browser_backend_keeps_bridge_style_article_entry_direct_route() { let transport = Arc::new(MockWorkflowTransport::new(vec![])); @@ -999,6 +1842,7 @@ mod tests { "打开知乎写文章页面", &CompatTaskContext::default(), WorkflowRoute::ZhihuArticleEntry, + &test_settings(), ) .expect("bridge-style backend should satisfy direct route execution"); @@ -1416,6 +2260,10 @@ fn extract_article_draft( }) } +pub(crate) fn parse_generated_article_draft(text: &str) -> Option { + parse_article_draft(text) +} + fn parse_article_draft(text: &str) -> Option { let normalized = normalize_article_draft_input(text); let title_re = Regex::new(r"(?m)^标题[::]\s*(.+?)\s*$").expect("valid zhihu title regex"); diff --git a/src/runtime/engine.rs b/src/runtime/engine.rs index aca5821..6a56e62 100644 --- a/src/runtime/engine.rs +++ b/src/runtime/engine.rs @@ -276,13 +276,16 @@ pub fn is_zhihu_hotlist_task( || normalized_url.contains("zhihu.com") || normalized_title.contains("zhihu") || page_title.unwrap_or_default().contains("知乎"); - let is_hotlist = normalized_instruction.contains("hotlist") - || instruction.contains("热榜") - || normalized_url.contains("/hot") + let hotlist_in_instruction = normalized_instruction.contains("hotlist") + || instruction.contains("热榜"); + let hotlist_in_context = normalized_url.contains("/hot") || normalized_title.contains("hotlist") || page_title.unwrap_or_default().contains("热榜"); - is_zhihu && is_hotlist + // "热榜"/"hotlist" directly in the instruction implies Zhihu (the only + // hotlist feature sgClaw supports). Context-only signals (URL/title) + // still require the "知乎" qualifier to avoid false positives. + (is_zhihu && (hotlist_in_instruction || hotlist_in_context)) || hotlist_in_instruction } fn task_needs_office_export(instruction: &str) -> bool { diff --git a/src/service/server.rs b/src/service/server.rs index 622a83d..0e95217 100644 --- a/src/service/server.rs +++ b/src/service/server.rs @@ -12,7 +12,7 @@ use tungstenite::stream::MaybeTlsStream; use tungstenite::{connect, Message, WebSocket}; use crate::agent::{ - run_submit_task_with_browser_backend, AgentEventSink, AgentRuntimeContext, SubmitTaskRequest, + run_submit_task_with_browser_backend, AgentEventSink, AgentRuntimeContext, }; use crate::browser::callback_host::LiveBrowserCallbackHost; use crate::browser::ws_backend::WsClient; @@ -236,6 +236,10 @@ pub fn serve_client( browser_ws_url: &str, mac_policy: &MacPolicy, ) -> Result<(), PipeError> { + // Cache the browser callback host across tasks so the helper page tab is + // opened only once per client session instead of once per task. + let mut cached_host: Option> = None; + loop { let Some(message) = sink.recv_client_message()? else { return Ok(()); @@ -276,16 +280,48 @@ pub fn serve_client( continue; } - let result = (|| { - let browser_backend = browser_backend_for_submit(browser_ws_url, mac_policy, &request)?; - run_submit_task_with_browser_backend( - &NoopTransport, - sink.as_ref(), - browser_backend, - context, - request, - ) - })(); + // Lazily create and cache the browser callback host. On first + // task it opens the helper page; subsequent tasks reuse it. + if cached_host.is_none() { + let bootstrap_url = initial_request_url_for_submit_task(&request); + match LiveBrowserCallbackHost::start_with_browser_ws_url( + browser_ws_url, + &bootstrap_url, + Duration::from_secs(15), + BROWSER_RESPONSE_TIMEOUT, + ) { + Ok(host) => { + cached_host = Some(Arc::new(host)); + } + Err(err) => { + session.finish_task(); + eprintln!("task execution failed: {err}"); + sink.send(&AgentMessage::TaskComplete { + success: false, + summary: format!("任务执行失败: {err}"), + })?; + continue; + } + } + } else { + cached_host.as_ref().unwrap().reset_pending_state(); + } + + let host = cached_host.as_ref().unwrap(); + let browser_backend: Arc = + Arc::new(BrowserCallbackBackend::new( + host.clone(), + mac_policy.clone(), + host.helper_url().to_string(), + )); + + let result = run_submit_task_with_browser_backend( + &NoopTransport, + sink.as_ref(), + browser_backend, + context, + request, + ); session.finish_task(); match result { Ok(()) => {} @@ -304,40 +340,6 @@ pub fn serve_client( } } -fn browser_backend_for_submit( - browser_ws_url: &str, - mac_policy: &MacPolicy, - request: &SubmitTaskRequest, -) -> Result, PipeError> { - // Always use BrowserCallbackBackend which opens a real helper page in the - // browser via `sgBrowerserOpenPage`. The helper page acts as a genuine - // browser tab whose URL the browser WS server can route commands to. - // - // WsBrowserBackend is NOT suitable here because: - // 1. It uses a fabricated source URL (e.g. "https://www.zhihu.com") that - // does not correspond to any open tab, so the browser silently drops - // the command. - // 2. It expects a numeric status frame ("0") from the browser WS, but - // the real SuperRPA browser never sends such frames → timeout. - // - // The bootstrap_request_url MUST be the URL of a page that is already open - // in the browser. The browser WS server requires the first element of the - // command array (requestUrl) to match an existing tab; otherwise the - // sgBrowerserOpenPage command is silently ignored. - let bootstrap_request_url = initial_request_url_for_submit_task(request); - let callback_host = Arc::new(LiveBrowserCallbackHost::start_with_browser_ws_url( - browser_ws_url, - &bootstrap_request_url, - Duration::from_secs(15), - BROWSER_RESPONSE_TIMEOUT, - )?); - Ok(Arc::new(BrowserCallbackBackend::new( - callback_host.clone(), - mac_policy.clone(), - callback_host.helper_url().to_string(), - ))) -} - pub(crate) fn initial_request_url_for_submit_task(request: &crate::agent::SubmitTaskRequest) -> String { request .page_url @@ -357,6 +359,7 @@ fn derive_request_url_from_instruction(instruction: &str) -> Option { crate::compat::workflow_executor::WorkflowRoute::ZhihuHotlistExportXlsx | crate::compat::workflow_executor::WorkflowRoute::ZhihuHotlistScreen | crate::compat::workflow_executor::WorkflowRoute::ZhihuArticleEntry + | crate::compat::workflow_executor::WorkflowRoute::ZhihuArticleAutoPublishGenerated ) }) { @@ -780,6 +783,19 @@ mod tests { ); } + #[test] + fn initial_request_url_falls_back_to_zhihu_origin_for_generated_article_publish_routes() { + let request = SubmitTaskRequest { + instruction: "在知乎自动发表一篇名称为人工智能技能大全".to_string(), + ..SubmitTaskRequest::default() + }; + + assert_eq!( + initial_request_url_for_submit_task(&request), + "https://www.zhihu.com" + ); + } + #[test] fn bridge_base_url_defaults_local_browser_ws_endpoint_to_http_bridge() { assert_eq!(