feat: add browser script skill execution
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use regex::Regex;
|
||||
@@ -11,8 +11,6 @@ use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
|
||||
|
||||
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
|
||||
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
|
||||
const HOTLIST_ROOT_SELECTORS: [&str; 3] = ["main", "body", "html"];
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum WorkflowRoute {
|
||||
ZhihuHotlistExportXlsx,
|
||||
@@ -87,28 +85,28 @@ fn collect_hotlist_items<T: Transport + 'static>(
|
||||
top_n: usize,
|
||||
) -> Result<Vec<HotlistItem>, PipeError> {
|
||||
navigate_hotlist_with_retry(transport, browser_tool)?;
|
||||
|
||||
for selector in HOTLIST_ROOT_SELECTORS {
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: format!("getText {selector}"),
|
||||
})?;
|
||||
let response = browser_tool.invoke(
|
||||
Action::GetText,
|
||||
json!({ "selector": selector }),
|
||||
ZHIHU_DOMAIN,
|
||||
)?;
|
||||
if !response.success {
|
||||
continue;
|
||||
}
|
||||
let text = response.data["text"].as_str().unwrap_or_default();
|
||||
let items = parse_hotlist_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return Ok(items);
|
||||
}
|
||||
transport.send(&AgentMessage::LogEntry {
|
||||
level: "info".to_string(),
|
||||
message: "call zhihu-hotlist.extract_hotlist".to_string(),
|
||||
})?;
|
||||
let response = browser_tool.invoke(
|
||||
Action::Eval,
|
||||
json!({ "script": load_hotlist_extractor_script(top_n)? }),
|
||||
ZHIHU_DOMAIN,
|
||||
)?;
|
||||
if !response.success {
|
||||
return Err(PipeError::Protocol(format!(
|
||||
"知乎热榜采集失败:{}",
|
||||
response
|
||||
.data
|
||||
.get("error")
|
||||
.and_then(|value| value.get("message"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("browser script execution failed")
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(Vec::new())
|
||||
parse_hotlist_items_payload(response.data.get("text").unwrap_or(&response.data))
|
||||
}
|
||||
|
||||
fn navigate_hotlist_with_retry<T: Transport + 'static>(
|
||||
@@ -212,130 +210,71 @@ fn export_screen<T: Transport>(
|
||||
Ok(format!("已生成知乎热榜大屏 {output_path}"))
|
||||
}
|
||||
|
||||
fn parse_hotlist_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let mut items = parse_single_line_items(text, top_n);
|
||||
if !items.is_empty() {
|
||||
return items;
|
||||
}
|
||||
|
||||
let lines = normalize_lines(text);
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
let mut idx = 0usize;
|
||||
|
||||
while idx < lines.len() && items.len() < top_n {
|
||||
let Some(rank) = parse_rank(&lines[idx]) else {
|
||||
idx += 1;
|
||||
continue;
|
||||
};
|
||||
if !seen_ranks.insert(rank) {
|
||||
idx += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut title = None;
|
||||
let mut heat = None;
|
||||
for candidate in lines.iter().skip(idx + 1).take(6) {
|
||||
if parse_rank(candidate).is_some() {
|
||||
break;
|
||||
}
|
||||
if heat.is_none() && looks_like_heat(candidate) {
|
||||
heat = Some(normalize_heat(candidate));
|
||||
continue;
|
||||
}
|
||||
if title.is_none() && !is_noise_line(candidate) {
|
||||
title = Some(candidate.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(title), Some(heat)) = (title, heat) {
|
||||
items.push(HotlistItem { rank, title, heat });
|
||||
}
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
items.sort_by_key(|item| item.rank);
|
||||
items.truncate(top_n);
|
||||
items
|
||||
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
|
||||
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")))
|
||||
.join("skill_lib")
|
||||
.join("skills")
|
||||
.join("zhihu-hotlist")
|
||||
.join("scripts")
|
||||
.join("extract_hotlist.js");
|
||||
let script = fs::read_to_string(&script_path).map_err(|err| {
|
||||
PipeError::Protocol(format!(
|
||||
"failed to read zhihu hotlist extractor script {}: {err}",
|
||||
script_path.display()
|
||||
))
|
||||
})?;
|
||||
Ok(format!(
|
||||
"(function() {{\nconst args = {};\n{}\n}})()",
|
||||
json!({ "top_n": top_n.to_string() }),
|
||||
script
|
||||
))
|
||||
}
|
||||
|
||||
fn parse_single_line_items(text: &str, top_n: usize) -> Vec<HotlistItem> {
|
||||
let re = Regex::new(
|
||||
r"(?m)^\s*(\d{1,2})[\.、\s]+(.+?)\s+(\d+(?:\.\d+)?\s*[万亿kKmM]?)\s*(?:热度)?\s*$",
|
||||
)
|
||||
.expect("valid hotlist single-line regex");
|
||||
let mut items = Vec::new();
|
||||
let mut seen_ranks = BTreeSet::new();
|
||||
fn parse_hotlist_items_payload(payload: &Value) -> Result<Vec<HotlistItem>, PipeError> {
|
||||
let normalized_payload = if let Some(text) = payload.as_str() {
|
||||
serde_json::from_str::<Value>(text).unwrap_or_else(|_| Value::String(text.to_string()))
|
||||
} else {
|
||||
payload.clone()
|
||||
};
|
||||
|
||||
for capture in re.captures_iter(text) {
|
||||
let rank = capture
|
||||
.get(1)
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.unwrap_or_default();
|
||||
if rank == 0 || !seen_ranks.insert(rank) {
|
||||
let rows = normalized_payload
|
||||
.get("rows")
|
||||
.and_then(Value::as_array)
|
||||
.ok_or_else(|| {
|
||||
PipeError::Protocol("知乎热榜采集失败:浏览器脚本未返回 rows".to_string())
|
||||
})?;
|
||||
|
||||
let mut items = Vec::new();
|
||||
for row in rows {
|
||||
let Some(cells) = row.as_array() else {
|
||||
continue;
|
||||
};
|
||||
if cells.len() != 3 {
|
||||
continue;
|
||||
}
|
||||
let title = capture.get(2).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
let heat = capture.get(3).map(|value| value.as_str().trim()).unwrap_or("");
|
||||
|
||||
let rank = cells[0]
|
||||
.as_u64()
|
||||
.or_else(|| cells[0].as_str().and_then(|value| value.parse::<u64>().ok()))
|
||||
.unwrap_or((items.len() + 1) as u64);
|
||||
let title = cells[1].as_str().unwrap_or_default().trim().to_string();
|
||||
let heat = cells[2].as_str().unwrap_or_default().trim().to_string();
|
||||
if title.is_empty() || heat.is_empty() {
|
||||
continue;
|
||||
}
|
||||
items.push(HotlistItem {
|
||||
rank,
|
||||
title: title.to_string(),
|
||||
heat: normalize_heat(heat),
|
||||
});
|
||||
if items.len() >= top_n {
|
||||
break;
|
||||
}
|
||||
items.push(HotlistItem { rank, title, heat });
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn normalize_lines(text: &str) -> Vec<String> {
|
||||
text.lines()
|
||||
.map(str::trim)
|
||||
.filter(|line| !line.is_empty())
|
||||
.map(|line| line.split_whitespace().collect::<Vec<_>>().join(" "))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn parse_rank(line: &str) -> Option<u64> {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if trimmed.chars().all(|ch| ch.is_ascii_digit()) {
|
||||
return trimmed.parse::<u64>().ok().filter(|value| *value > 0);
|
||||
if items.is_empty() {
|
||||
return Err(PipeError::Protocol(
|
||||
"知乎热榜采集失败:浏览器脚本未返回有效热榜条目".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let rank_re = Regex::new(r"^(\d{1,2})[\.、\s]").expect("valid rank regex");
|
||||
rank_re
|
||||
.captures(trimmed)
|
||||
.and_then(|capture| capture.get(1))
|
||||
.and_then(|value| value.as_str().parse::<u64>().ok())
|
||||
.filter(|value| *value > 0)
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
fn looks_like_heat(line: &str) -> bool {
|
||||
let compact = line.replace(' ', "");
|
||||
let heat_re = Regex::new(r"^\d+(?:\.\d+)?(?:万|亿|k|K|m|M)?(?:热度)?$").expect("valid heat regex");
|
||||
heat_re.is_match(compact.as_str())
|
||||
}
|
||||
|
||||
fn normalize_heat(line: &str) -> String {
|
||||
line.replace(' ', "")
|
||||
.trim_end_matches("热度")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn is_noise_line(line: &str) -> bool {
|
||||
matches!(
|
||||
line,
|
||||
"知乎" | "知乎热榜" | "热榜" | "首页" | "发现" | "等你来答" | "更多内容"
|
||||
)
|
||||
}
|
||||
|
||||
fn extract_top_n(instruction: &str) -> usize {
|
||||
let re = Regex::new(r"(?:前|top\s*)(\d{1,2})").expect("valid top-n regex");
|
||||
re.captures(&instruction.to_ascii_lowercase())
|
||||
|
||||
Reference in New Issue
Block a user