use serde_json::{json, Map, Value}; use crate::llm::{ChatMessage, LlmError, LlmProvider, ToolDefinition, ToolFunctionCall}; use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport}; const BROWSER_ACTION_TOOL_NAME: &str = "browser_action"; #[derive(Debug, Clone, PartialEq)] struct BrowserActionCall { action: Action, expected_domain: String, params: Value, } pub fn execute_task_with_provider( transport: &T, browser_tool: &BrowserPipeTool, provider: &P, instruction: &str, ) -> Result { let messages = vec![ ChatMessage { role: "system".to_string(), content: "You are sgClaw. Use browser_action to complete the browser task.".to_string(), }, ChatMessage { role: "user".to_string(), content: instruction.to_string(), }, ]; let tools = vec![browser_action_tool_definition()]; let calls = provider .chat(&messages, &tools) .map_err(map_llm_error_to_pipe_error)?; for call in calls { let browser_call = parse_browser_action_call(call).map_err(|err| PipeError::Protocol(err.to_string()))?; transport.send(&AgentMessage::LogEntry { level: "info".to_string(), message: format!( "{} {}", browser_call.action.as_str(), browser_call.expected_domain ), })?; let result = browser_tool.invoke( browser_call.action, browser_call.params, &browser_call.expected_domain, )?; if !result.success { return Err(PipeError::Protocol(format!( "browser action failed: {}", result.data ))); } } Ok(format!("已通过 Agent 执行任务: {instruction}")) } pub fn browser_action_tool_definition() -> ToolDefinition { ToolDefinition { name: BROWSER_ACTION_TOOL_NAME.to_string(), description: "Execute browser actions in SuperRPA".to_string(), parameters: json!({ "type": "object", "required": ["action", "expected_domain"], "properties": { "action": { "type": "string", "enum": ["click", "type", "navigate", "getText"] }, "expected_domain": { "type": "string" }, "selector": { "type": "string" }, "text": { "type": "string" }, "url": { "type": "string" }, "clear_first": { "type": "boolean" } } }), } } fn parse_browser_action_call(call: ToolFunctionCall) -> Result { if call.name != BROWSER_ACTION_TOOL_NAME { return Err(RuntimeError::UnsupportedTool(call.name)); } let mut args = match call.arguments { Value::Object(args) => args, other => { return Err(RuntimeError::InvalidArguments(format!( "expected object arguments, got {other}" ))) } }; let action_name = take_required_string(&mut args, "action")?; let expected_domain = take_required_string(&mut args, "expected_domain")?; let action = parse_action(&action_name)?; let params = Value::Object(action_params_from_args(args)); Ok(BrowserActionCall { action, expected_domain, params, }) } fn map_llm_error_to_pipe_error(err: LlmError) -> PipeError { PipeError::Protocol(err.to_string()) } fn parse_action(action_name: &str) -> Result { match action_name { "click" => Ok(Action::Click), "type" => Ok(Action::Type), "navigate" => Ok(Action::Navigate), "getText" => Ok(Action::GetText), other => Err(RuntimeError::UnsupportedAction(other.to_string())), } } fn take_required_string( args: &mut Map, key: &'static str, ) -> Result { match args.remove(key) { Some(Value::String(value)) if !value.trim().is_empty() => Ok(value), Some(other) => Err(RuntimeError::InvalidArguments(format!( "{key} must be a non-empty string, got {other}" ))), None => Err(RuntimeError::MissingField(key)), } } fn action_params_from_args(args: Map) -> Map { args } #[derive(Debug, thiserror::Error)] enum RuntimeError { #[error("unsupported tool: {0}")] UnsupportedTool(String), #[error("unsupported action: {0}")] UnsupportedAction(String), #[error("missing required field: {0}")] MissingField(&'static str), #[error("invalid tool arguments: {0}")] InvalidArguments(String), }