feat: add browser script skill execution

This commit is contained in:
zyl
2026-03-30 02:15:07 +08:00
parent f7e2ff256e
commit d2c9902966
22 changed files with 1775 additions and 249 deletions

View File

@@ -4,7 +4,9 @@ use std::time::{Duration, Instant};
use serde_json::Value;
use crate::pipe::protocol::{Action, AgentMessage, BrowserMessage, SecurityFields, Timing};
use crate::pipe::protocol::{
Action, AgentMessage, BrowserMessage, ExecutionSurfaceMetadata, SecurityFields, Timing,
};
use crate::pipe::{PipeError, Transport};
use crate::security::{sign_command, MacPolicy};
@@ -53,6 +55,10 @@ impl<T: Transport> BrowserPipeTool<T> {
self
}
pub fn surface_metadata(&self) -> ExecutionSurfaceMetadata {
self.mac_policy.privileged_surface_metadata()
}
pub fn invoke(
&self,
action: Action,

View File

@@ -2,7 +2,9 @@ use std::time::Duration;
use uuid::Uuid;
use crate::pipe::protocol::{supported_actions, AgentMessage, BrowserMessage, PROTOCOL_VERSION};
use crate::pipe::protocol::{
supported_actions, AgentMessage, BrowserMessage, ExecutionSurfaceMetadata, PROTOCOL_VERSION,
};
use crate::pipe::{PipeError, Transport};
use crate::security::derive_session_key;
@@ -13,6 +15,19 @@ pub struct HandshakeResult {
pub capabilities: Vec<String>,
}
impl HandshakeResult {
pub fn browser_surface_metadata(&self) -> Option<ExecutionSurfaceMetadata> {
self.capabilities
.iter()
.any(|capability| capability == "browser_action")
.then(|| {
ExecutionSurfaceMetadata::privileged_browser_pipe(
"browser_host_and_mac_policy",
)
})
}
}
pub fn perform_handshake<T: Transport>(
transport: &T,
timeout: Duration,

View File

@@ -5,8 +5,8 @@ pub mod protocol;
pub use browser_tool::{BrowserPipeTool, CommandOutput};
pub use handshake::{perform_handshake, HandshakeResult};
pub use protocol::{
supported_actions, Action, AgentMessage, BrowserMessage, ConversationMessage,
SecurityFields, Timing,
supported_actions, Action, AgentMessage, BrowserContext, BrowserMessage,
ConversationMessage, ExecutionSurfaceKind, ExecutionSurfaceMetadata, SecurityFields, Timing,
};
use std::io::{BufRead, BufReader, Read, Write};

View File

@@ -3,6 +3,49 @@ use serde_json::{json, Value};
pub const PROTOCOL_VERSION: &str = "1.0";
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ExecutionSurfaceKind {
PrivilegedBrowserPipe,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct BrowserContext {
pub page_url: String,
pub page_title: String,
}
impl BrowserContext {
pub fn is_empty(&self) -> bool {
self.page_url.trim().is_empty() && self.page_title.trim().is_empty()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ExecutionSurfaceMetadata {
pub kind: ExecutionSurfaceKind,
pub privileged: bool,
pub defines_runtime_identity: bool,
pub guard: String,
#[serde(default)]
pub allowed_domains: Vec<String>,
#[serde(default)]
pub allowed_actions: Vec<String>,
}
impl ExecutionSurfaceMetadata {
pub fn privileged_browser_pipe(guard: impl Into<String>) -> Self {
Self {
kind: ExecutionSurfaceKind::PrivilegedBrowserPipe,
privileged: true,
defines_runtime_identity: false,
guard: guard.into(),
allowed_domains: Vec::new(),
allowed_actions: Vec::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum BrowserMessage {
@@ -34,6 +77,34 @@ pub enum BrowserMessage {
},
}
impl BrowserMessage {
pub fn browser_context(&self) -> Option<BrowserContext> {
match self {
Self::SubmitTask {
page_url,
page_title,
..
} => {
let context = BrowserContext {
page_url: page_url.clone(),
page_title: page_title.clone(),
};
(!context.is_empty()).then_some(context)
}
_ => None,
}
}
pub fn requested_surface_metadata(&self) -> Option<ExecutionSurfaceMetadata> {
match self {
Self::SubmitTask { .. } => Some(ExecutionSurfaceMetadata::privileged_browser_pipe(
"browser_host_and_mac_policy",
)),
_ => None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ConversationMessage {
pub role: String,
@@ -71,6 +142,7 @@ pub enum Action {
Type,
Navigate,
GetText,
Eval,
GetHtml,
WaitForSelector,
PageScreenshot,
@@ -90,6 +162,7 @@ impl Action {
Action::Type => "type",
Action::Navigate => "navigate",
Action::GetText => "getText",
Action::Eval => "eval",
Action::GetHtml => "getHtml",
Action::WaitForSelector => "waitForSelector",
Action::PageScreenshot => "pageScreenshot",
@@ -122,6 +195,7 @@ pub fn supported_actions() -> Vec<Action> {
Action::Type,
Action::Navigate,
Action::GetText,
Action::Eval,
Action::GetHtml,
Action::WaitForSelector,
Action::PageScreenshot,