feat: realign zhihu browser callback runtime

Keep Zhihu browser-attached execution on the callback-host path so direct routes, runtime wiring, and service startup stay aligned for the current websocket browser flow.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
木炎
2026-04-06 12:09:47 +08:00
parent 3e18350320
commit 6068a8228b
9 changed files with 1907 additions and 116 deletions

View File

@@ -8,7 +8,8 @@ use crate::browser::backend::BrowserBackend;
use crate::pipe::{Action, CommandOutput, ExecutionSurfaceMetadata, PipeError, Timing};
use crate::security::MacPolicy;
const NAVIGATE_CALLBACK_NAME: &str = "sgclawOnLoaded";
const CLICK_PROBE_CALLBACK_NAME: &str = "sgclawOnClickProbe";
const TYPE_PROBE_CALLBACK_NAME: &str = "sgclawOnTypeProbe";
const GET_TEXT_CALLBACK_NAME: &str = "sgclawOnGetText";
const EVAL_CALLBACK_NAME: &str = "sgclawOnEval";
const SHOW_AREA: &str = "show";
@@ -54,6 +55,12 @@ pub struct BrowserCallbackBackend {
next_seq: AtomicU64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CallbackInputMode {
Click,
Type,
}
impl BrowserCallbackBackend {
pub fn new(
host: Arc<dyn BrowserCallbackHost>,
@@ -91,6 +98,8 @@ impl BrowserCallbackBackend {
target_url,
]))
}
Action::Click => self.build_input_command(action, params, CallbackInputMode::Click),
Action::Type => self.build_input_command(action, params, CallbackInputMode::Type),
Action::GetText => {
let target_url = self.target_url(action, params)?;
let domain = extract_domain(&target_url)?;
@@ -129,6 +138,35 @@ impl BrowserCallbackBackend {
}
}
fn build_input_command(
&self,
action: &Action,
params: &Value,
mode: CallbackInputMode,
) -> Result<Value, PipeError> {
let target_url = self.target_url(action, params)?;
let domain = extract_domain(&target_url)?;
let selector = optional_string(params, "selector");
let probe_script = optional_string(params, "probe_script");
let text = matches!(mode, CallbackInputMode::Type)
.then(|| required_string(params, "text"))
.transpose()?;
let js_code = build_input_probe_js(
mode,
&self.helper_page_url,
selector.as_deref(),
probe_script.as_deref(),
text.as_deref(),
)?;
Ok(json!([
self.helper_page_url,
"sgBrowserExcuteJsCodeByDomain",
domain,
js_code,
SHOW_AREA,
]))
}
fn target_url(&self, action: &Action, params: &Value) -> Result<String, PipeError> {
if let Some(target_url) = params
.get("target_url")
@@ -146,6 +184,117 @@ impl BrowserCallbackBackend {
.clone()
.ok_or_else(|| PipeError::Protocol(format!("target_url is required for {}", action.as_str())))
}
fn execute_simulated_click(
&self,
seq: u64,
expected_domain: &str,
success: &BrowserCallbackSuccess,
) -> Result<BrowserCallbackSuccess, PipeError> {
let probe = success
.data
.get("probe")
.ok_or_else(|| PipeError::Protocol("callback click probe payload missing".to_string()))?;
let x = probe
.get("x")
.and_then(Value::as_f64)
.ok_or_else(|| PipeError::Protocol("callback click probe missing x".to_string()))?;
let y = probe
.get("y")
.and_then(Value::as_f64)
.ok_or_else(|| PipeError::Protocol("callback click probe missing y".to_string()))?;
let timing = success.timing.clone();
match self.host.execute(BrowserCallbackRequest {
seq,
request_url: self.helper_page_url.clone(),
expected_domain: expected_domain.to_string(),
action: Action::Click.as_str().to_string(),
command: json!([
self.helper_page_url,
"sgBroewserSimulateMouse",
x,
y,
"left",
"",
""
]),
}) {
Ok(BrowserCallbackResponse::Error(error)) => Err(PipeError::Protocol(format!(
"callback host browser action failed: {} ({})",
error.message, error.details
))),
Ok(BrowserCallbackResponse::Success(_)) | Err(PipeError::Timeout) => {
Ok(BrowserCallbackSuccess {
success: true,
data: json!({
"clicked": true,
"probe": { "x": x, "y": y },
}),
aom_snapshot: vec![],
timing,
})
}
Err(error) => Err(error),
}
}
fn execute_simulated_type(
&self,
seq: u64,
expected_domain: &str,
params: &Value,
success: &BrowserCallbackSuccess,
) -> Result<BrowserCallbackSuccess, PipeError> {
let probe = success
.data
.get("probe")
.ok_or_else(|| PipeError::Protocol("callback type probe payload missing".to_string()))?;
let x = probe
.get("x")
.and_then(Value::as_f64)
.ok_or_else(|| PipeError::Protocol("callback type probe missing x".to_string()))?;
let y = probe
.get("y")
.and_then(Value::as_f64)
.ok_or_else(|| PipeError::Protocol("callback type probe missing y".to_string()))?;
let text = params
.get("text")
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.ok_or_else(|| PipeError::Protocol("text is required".to_string()))?;
let timing = success.timing.clone();
match self.host.execute(BrowserCallbackRequest {
seq,
request_url: self.helper_page_url.clone(),
expected_domain: expected_domain.to_string(),
action: Action::Type.as_str().to_string(),
command: json!([
self.helper_page_url,
"sgBroewserSimulateKeyborad",
x,
y,
text
]),
}) {
Ok(BrowserCallbackResponse::Error(error)) => Err(PipeError::Protocol(format!(
"callback host browser action failed: {} ({})",
error.message, error.details
))),
Ok(BrowserCallbackResponse::Success(_)) | Err(PipeError::Timeout) => {
Ok(BrowserCallbackSuccess {
success: true,
data: json!({
"typed": true,
"probe": { "x": x, "y": y, "text": text },
}),
aom_snapshot: vec![],
timing,
})
}
Err(error) => Err(error),
}
}
}
impl BrowserBackend for BrowserCallbackBackend {
@@ -168,6 +317,13 @@ impl BrowserBackend for BrowserCallbackBackend {
match reply {
BrowserCallbackResponse::Success(success) => {
let success = match action {
Action::Click => self.execute_simulated_click(seq, expected_domain, &success)?,
Action::Type => {
self.execute_simulated_type(seq, expected_domain, &params, &success)?
}
_ => success,
};
if matches!(action, Action::Navigate) {
if let Some(url) = params
.get("url")
@@ -202,6 +358,11 @@ impl BrowserBackend for BrowserCallbackBackend {
fn supports_eval(&self) -> bool {
self.mac_policy.supports_pipe_action(&Action::Eval)
}
fn supports_live_input(&self) -> bool {
self.mac_policy.supports_pipe_action(&Action::Click)
&& self.mac_policy.supports_pipe_action(&Action::Type)
}
}
fn required_string(params: &Value, key: &str) -> Result<String, PipeError> {
@@ -214,6 +375,15 @@ fn required_string(params: &Value, key: &str) -> Result<String, PipeError> {
.ok_or_else(|| PipeError::Protocol(format!("{key} is required")))
}
fn optional_string(params: &Value, key: &str) -> Option<String> {
params
.get(key)
.and_then(Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToString::to_string)
}
fn build_get_text_js(source_url: &str, selector: &str) -> String {
let escaped_source_url = escape_js_single_quoted(source_url);
let escaped_selector = escape_js_single_quoted(selector);
@@ -259,6 +429,62 @@ fn build_eval_js(source_url: &str, script: &str) -> String {
)
}
fn build_input_probe_js(
mode: CallbackInputMode,
source_url: &str,
selector: Option<&str>,
probe_script: Option<&str>,
text: Option<&str>,
) -> Result<String, PipeError> {
let escaped_source_url = escape_js_single_quoted(source_url);
let callback = match mode {
CallbackInputMode::Click => CLICK_PROBE_CALLBACK_NAME,
CallbackInputMode::Type => TYPE_PROBE_CALLBACK_NAME,
};
let events_url = escape_js_single_quoted(&events_endpoint_url(source_url));
let payload_expression = match mode {
CallbackInputMode::Click => "JSON.stringify({x:x,y:y})".to_string(),
CallbackInputMode::Type => {
let escaped_text = escape_js_single_quoted(text.unwrap_or_default());
format!("JSON.stringify({{x:x,y:y,text:'{escaped_text}'}})")
}
};
let payload_object = match mode {
CallbackInputMode::Click => "{x:x,y:y}".to_string(),
CallbackInputMode::Type => {
let escaped_text = escape_js_single_quoted(text.unwrap_or_default());
format!("{{x:x,y:y,text:'{escaped_text}'}}")
}
};
let element_lookup = if let Some(script) = probe_script {
format!("(function(){{{script}}})()")
} else if let Some(selector) = selector {
let escaped_selector = escape_js_single_quoted(selector);
format!("document.querySelector('{escaped_selector}')")
} else {
return Err(PipeError::Protocol(
"selector or probe_script is required".to_string(),
));
};
let missing_hint = selector
.map(|value| format!("selector not found: {}", escape_js_single_quoted(value)))
.unwrap_or_else(|| "input probe target not found".to_string());
Ok(format!(
"(function(){{try{{\
var el={element_lookup};\
if(!el){{throw new Error('{missing_hint}');}}\
var rect=(typeof el.getBoundingClientRect==='function')?el.getBoundingClientRect():null;\
var x=rect?(rect.left+(rect.width/2)):0;\
var y=rect?(rect.top+(rect.height/2)):0;\
try{{callBackJsToCpp('{escaped_source_url}@_@'+window.location.href+'@_@{callback}@_@sgBrowserExcuteJsCodeByDomain@_@'+String({payload_expression}))}}catch(_){{}}\
var j=JSON.stringify({{type:'callback',callback:'{callback}',request_url:'{escaped_source_url}',payload:{payload_object}}});\
try{{var r=new XMLHttpRequest();r.open('POST','{events_url}',true);r.setRequestHeader('Content-Type','application/json');r.send(j)}}catch(_){{}}\
try{{navigator.sendBeacon('{events_url}',new Blob([j],{{type:'application/json'}}))}}catch(_){{}}\
}}catch(e){{}}}})()"
))
}
/// Derive the callback host events endpoint URL from the helper page URL.
/// e.g. "http://127.0.0.1:62819/sgclaw/browser-helper.html"
/// → "http://127.0.0.1:62819/sgclaw/callback/events"
@@ -297,5 +523,330 @@ fn extract_domain(url: &str) -> Result<String, PipeError> {
}
fn escape_js_single_quoted(raw: &str) -> String {
raw.replace('\\', "\\\\").replace('\'', "\\'")
raw.replace('\\', "\\\\")
.replace('\'', "\\'")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\0', "\\0")
.replace('\u{2028}', "\\u2028")
.replace('\u{2029}', "\\u2029")
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::VecDeque;
fn test_policy() -> MacPolicy {
MacPolicy::from_json_str(
r#"{
"version": "1.0",
"domains": { "allowed": ["www.zhihu.com", "zhuanlan.zhihu.com"] },
"pipe_actions": {
"allowed": ["click", "type", "navigate", "getText", "eval"],
"blocked": []
}
}"#,
)
.unwrap()
}
struct FakeCallbackHost {
requests: Mutex<Vec<BrowserCallbackRequest>>,
replies: Mutex<VecDeque<Result<BrowserCallbackResponse, PipeError>>>,
}
impl FakeCallbackHost {
fn new(replies: Vec<Result<BrowserCallbackResponse, PipeError>>) -> Self {
Self {
requests: Mutex::new(Vec::new()),
replies: Mutex::new(VecDeque::from(replies)),
}
}
fn requests(&self) -> Vec<BrowserCallbackRequest> {
self.requests.lock().unwrap().clone()
}
}
impl BrowserCallbackHost for FakeCallbackHost {
fn execute(&self, request: BrowserCallbackRequest) -> Result<BrowserCallbackResponse, PipeError> {
self.requests.lock().unwrap().push(request);
self.replies
.lock()
.unwrap()
.pop_front()
.unwrap_or_else(|| Err(PipeError::Timeout))
}
}
fn success_reply(data: Value) -> Result<BrowserCallbackResponse, PipeError> {
Ok(BrowserCallbackResponse::Success(BrowserCallbackSuccess {
success: true,
data,
aom_snapshot: vec![],
timing: Timing {
queue_ms: 1,
exec_ms: 1,
},
}))
}
#[test]
fn callback_backend_click_treats_simulated_mouse_follow_up_as_fire_and_forget() {
let host = Arc::new(FakeCallbackHost::new(vec![success_reply(
json!({ "probe": { "x": 320.5, "y": 240.25 } }),
)]));
let backend = BrowserCallbackBackend::new(
host.clone(),
test_policy(),
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
);
let output = backend
.invoke(
Action::Click,
json!({
"target_url": "https://zhuanlan.zhihu.com/write",
"selector": "button"
}),
"zhuanlan.zhihu.com",
)
.unwrap();
assert!(output.success);
let requests = host.requests();
assert_eq!(requests.len(), 2);
assert_eq!(requests[1].command, json!([
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
"sgBroewserSimulateMouse",
320.5,
240.25,
"left",
"",
""
]));
}
#[test]
fn callback_backend_click_survives_simulated_mouse_timeout() {
let host = Arc::new(FakeCallbackHost::new(vec![
success_reply(json!({ "probe": { "x": 320.5, "y": 240.25 } })),
Err(PipeError::Timeout),
]));
let backend = BrowserCallbackBackend::new(
host.clone(),
test_policy(),
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
);
let output = backend
.invoke(
Action::Click,
json!({
"target_url": "https://zhuanlan.zhihu.com/write",
"selector": "button"
}),
"zhuanlan.zhihu.com",
)
.expect("simulated mouse timeout should be treated as fire-and-forget success");
assert!(output.success);
let requests = host.requests();
assert_eq!(requests.len(), 2);
}
#[test]
fn callback_backend_click_uses_domain_probe_then_simulated_mouse_input() {
let host = Arc::new(FakeCallbackHost::new(vec![
success_reply(json!({ "probe": { "x": 320.5, "y": 240.25 } })),
success_reply(json!({ "clicked": true })),
]));
let backend = BrowserCallbackBackend::new(
host.clone(),
test_policy(),
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
);
let output = backend
.invoke(
Action::Click,
json!({
"target_url": "https://zhuanlan.zhihu.com/write",
"selector": "button"
}),
"zhuanlan.zhihu.com",
)
.unwrap();
assert!(output.success);
let requests = host.requests();
assert_eq!(requests.len(), 2);
assert_eq!(requests[0].action, "click");
assert_eq!(requests[0].command[1], json!("sgBrowserExcuteJsCodeByDomain"));
assert_eq!(requests[0].command[2], json!("zhuanlan.zhihu.com"));
let script = requests[0].command[3].as_str().unwrap();
assert!(script.contains("document.querySelector('button')"));
assert!(script.contains("sgclawOnClick"));
assert_eq!(requests[1].action, "click");
assert_eq!(requests[1].command, json!([
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
"sgBroewserSimulateMouse",
320.5,
240.25,
"left",
"",
""
]));
}
#[test]
fn callback_backend_type_treats_simulated_keyboard_follow_up_as_fire_and_forget() {
let host = Arc::new(FakeCallbackHost::new(vec![success_reply(
json!({ "probe": { "x": 160.0, "y": 90.0, "text": "正文" } }),
)]));
let backend = BrowserCallbackBackend::new(
host.clone(),
test_policy(),
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
);
let output = backend
.invoke(
Action::Type,
json!({
"target_url": "https://zhuanlan.zhihu.com/write",
"selector": "div[contenteditable='true']",
"text": "正文"
}),
"zhuanlan.zhihu.com",
)
.unwrap();
assert!(output.success);
let requests = host.requests();
assert_eq!(requests.len(), 2);
assert_eq!(requests[1].command, json!([
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
"sgBroewserSimulateKeyborad",
160.0,
90.0,
"正文"
]));
}
#[test]
fn callback_backend_type_uses_custom_probe_script_when_provided() {
let host = Arc::new(FakeCallbackHost::new(vec![
success_reply(json!({ "probe": { "x": 160.0, "y": 90.0, "text": "正文" } })),
success_reply(json!({ "typed": true })),
]));
let backend = BrowserCallbackBackend::new(
host.clone(),
test_policy(),
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
);
let output = backend
.invoke(
Action::Type,
json!({
"target_url": "https://zhuanlan.zhihu.com/write",
"probe_script": "return document.body;",
"text": "正文"
}),
"zhuanlan.zhihu.com",
)
.unwrap();
assert!(output.success);
let requests = host.requests();
assert_eq!(requests.len(), 2);
let script = requests[0].command[3].as_str().unwrap();
assert!(script.contains("return document.body;"));
assert!(!script.contains("selector not found: div[contenteditable='true']"));
assert_eq!(requests[1].command, json!([
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
"sgBroewserSimulateKeyborad",
160.0,
90.0,
"正文"
]));
}
#[test]
fn callback_backend_type_uses_domain_probe_then_simulated_keyboard_input() {
let host = Arc::new(FakeCallbackHost::new(vec![
success_reply(json!({ "probe": { "x": 160.0, "y": 90.0, "text": "正文" } })),
success_reply(json!({ "typed": true })),
]));
let backend = BrowserCallbackBackend::new(
host.clone(),
test_policy(),
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
);
let output = backend
.invoke(
Action::Type,
json!({
"target_url": "https://zhuanlan.zhihu.com/write",
"selector": "div[contenteditable='true']",
"text": "正文"
}),
"zhuanlan.zhihu.com",
)
.unwrap();
assert!(output.success);
let requests = host.requests();
assert_eq!(requests.len(), 2);
assert_eq!(requests[0].action, "type");
assert_eq!(requests[0].command[1], json!("sgBrowserExcuteJsCodeByDomain"));
assert_eq!(requests[0].command[2], json!("zhuanlan.zhihu.com"));
let script = requests[0].command[3].as_str().unwrap();
assert!(script.contains("document.querySelector('div[contenteditable=\\'true\\']')"));
assert!(script.contains("sgclawOnType"));
assert!(!script.contains("el.value="));
assert_eq!(requests[1].action, "type");
assert_eq!(requests[1].command, json!([
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
"sgBroewserSimulateKeyborad",
160.0,
90.0,
"正文"
]));
}
#[test]
fn escape_js_single_quoted_escapes_newlines_and_control_chars() {
let raw = "第一行\n第二行\r\n第三行";
let escaped = escape_js_single_quoted(raw);
assert!(!escaped.contains('\n'), "literal newline must be escaped");
assert!(!escaped.contains('\r'), "literal carriage return must be escaped");
assert!(escaped.contains("\\n"), "should contain escaped newline");
assert!(escaped.contains("\\r"), "should contain escaped carriage return");
assert_eq!(escaped, "第一行\\n第二行\\r\\n第三行");
}
#[test]
fn type_probe_script_with_multiline_text_is_valid_js() {
let text_with_newlines = "标题\n\n正文第一段\n正文第二段";
let js = build_input_probe_js(
CallbackInputMode::Type,
"http://127.0.0.1:17888/sgclaw/browser-helper.html",
Some("div[contenteditable='true']"),
None,
Some(text_with_newlines),
)
.unwrap();
// The generated JS must NOT contain literal newlines inside single-quoted strings.
// Split on single quotes and check inner segments.
assert!(
!js.contains("标题\n"),
"literal newline must not appear in the JS probe script"
);
assert!(js.contains("标题\\n"));
assert!(js.contains("sgclawOnTypeProbe"));
}
}