sgclaw: stop zhihu publish flow before editor on creator page

This commit is contained in:
zyl
2026-03-30 13:35:50 +08:00
parent bf09de6700
commit cd94904329
3 changed files with 1334 additions and 156 deletions

View File

@@ -5,16 +5,30 @@ use regex::Regex;
use serde_json::{json, Value};
use zeroclaw::tools::Tool;
use crate::compat::runtime::CompatTaskContext;
use crate::compat::openxml_office_tool::OpenXmlOfficeTool;
use crate::compat::screen_html_export_tool::ScreenHtmlExportTool;
use crate::pipe::{Action, AgentMessage, BrowserPipeTool, PipeError, Transport};
use crate::pipe::{
Action,
AgentMessage,
BrowserPipeTool,
ConversationMessage,
PipeError,
Transport,
};
const ZHIHU_DOMAIN: &str = "www.zhihu.com";
const ZHIHU_EDITOR_DOMAIN: &str = "zhuanlan.zhihu.com";
const ZHIHU_HOT_URL: &str = "https://www.zhihu.com/hot";
const ZHIHU_CREATOR_URL: &str = "https://www.zhihu.com/creator";
const ZHIHU_EDITOR_URL: &str = "https://zhuanlan.zhihu.com/write";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkflowRoute {
ZhihuHotlistExportXlsx,
ZhihuHotlistScreen,
ZhihuArticleEntry,
ZhihuArticleDraft,
ZhihuArticlePublish,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -24,25 +38,47 @@ struct HotlistItem {
heat: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ArticleDraft {
title: String,
body: String,
}
pub fn detect_route(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> Option<WorkflowRoute> {
if !crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
return None;
if crate::runtime::is_zhihu_hotlist_task(instruction, page_url, page_title) {
let normalized = instruction.to_ascii_lowercase();
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
return Some(WorkflowRoute::ZhihuHotlistScreen);
}
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
}
}
let normalized = instruction.to_ascii_lowercase();
if normalized.contains("dashboard") || instruction.contains("大屏") || instruction.contains("新标签页") {
return Some(WorkflowRoute::ZhihuHotlistScreen);
if task_requests_zhihu_article_entry(instruction, page_url, page_title) {
return Some(WorkflowRoute::ZhihuArticleEntry);
}
if normalized.contains("excel") || normalized.contains("xlsx") || instruction.contains("导出") {
return Some(WorkflowRoute::ZhihuHotlistExportXlsx);
if crate::runtime::task_requests_zhihu_article_publish(instruction, page_url, page_title) {
return Some(WorkflowRoute::ZhihuArticlePublish);
}
if crate::runtime::is_zhihu_write_task(instruction, page_url, page_title) {
return Some(WorkflowRoute::ZhihuArticleDraft);
}
None
}
pub fn prefers_direct_execution(route: &WorkflowRoute) -> bool {
matches!(
route,
WorkflowRoute::ZhihuArticleEntry |
WorkflowRoute::ZhihuArticleDraft |
WorkflowRoute::ZhihuArticlePublish
)
}
pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bool {
let normalized = summary.to_ascii_lowercase();
if normalized.contains(".xlsx") || normalized.contains(".html") {
@@ -52,10 +88,19 @@ pub fn should_fallback_after_summary(summary: &str, route: &WorkflowRoute) -> bo
let looks_like_denial = summary.contains("拒绝") ||
normalized.contains("denied") ||
normalized.contains("failed") ||
normalized.contains("protocol error") ||
normalized.contains("maximum tool iterations") ||
summary.contains("失败") ||
summary.contains("无法");
looks_like_denial || matches!(route, WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen)
looks_like_denial || matches!(
route,
WorkflowRoute::ZhihuHotlistExportXlsx |
WorkflowRoute::ZhihuHotlistScreen |
WorkflowRoute::ZhihuArticleEntry |
WorkflowRoute::ZhihuArticleDraft |
WorkflowRoute::ZhihuArticlePublish
)
}
pub fn execute_route<T: Transport + 'static>(
@@ -63,19 +108,33 @@ pub fn execute_route<T: Transport + 'static>(
browser_tool: &BrowserPipeTool<T>,
workspace_root: &Path,
instruction: &str,
task_context: &CompatTaskContext,
route: WorkflowRoute,
) -> Result<String, PipeError> {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistExportXlsx | WorkflowRoute::ZhihuHotlistScreen => {
let top_n = extract_top_n(instruction);
let items = collect_hotlist_items(transport, browser_tool, top_n)?;
if items.is_empty() {
return Err(PipeError::Protocol(
"知乎热榜采集失败:未能从页面文本中解析到热榜条目".to_string(),
));
}
match route {
WorkflowRoute::ZhihuHotlistExportXlsx => export_xlsx(transport, workspace_root, &items),
WorkflowRoute::ZhihuHotlistScreen => export_screen(transport, workspace_root, &items),
_ => unreachable!("handled by outer match"),
}
}
WorkflowRoute::ZhihuArticleEntry => {
execute_zhihu_article_entry_route(transport, browser_tool)
}
WorkflowRoute::ZhihuArticleDraft => {
execute_zhihu_article_route(transport, browser_tool, instruction, task_context, false)
}
WorkflowRoute::ZhihuArticlePublish => {
execute_zhihu_article_route(transport, browser_tool, instruction, task_context, true)
}
}
}
@@ -210,26 +269,153 @@ fn export_screen<T: Transport>(
Ok(format!("已生成知乎热榜大屏 {output_path}"))
}
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")))
.join("skill_lib")
.join("skills")
.join("zhihu-hotlist")
.join("scripts")
.join("extract_hotlist.js");
let script = fs::read_to_string(&script_path).map_err(|err| {
PipeError::Protocol(format!(
"failed to read zhihu hotlist extractor script {}: {err}",
script_path.display()
))
fn execute_zhihu_article_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
instruction: &str,
task_context: &CompatTaskContext,
publish_mode: bool,
) -> Result<String, PipeError> {
let Some(article) = extract_article_draft(instruction, &task_context.messages) else {
return Ok(
"这类知乎文章任务需要同时提供标题和正文后我才能继续确定性写作流程。请按“标题:…\\n正文…”的格式补充内容。"
.to_string(),
);
};
if publish_mode && !has_explicit_publish_confirmation(instruction) {
return Ok(build_publish_confirmation_message(&article));
}
navigate_zhihu_page(transport, browser_tool, ZHIHU_CREATOR_URL)?;
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-navigate.open_creator_entry".to_string(),
})?;
Ok(format!(
"(function() {{\nconst args = {};\n{}\n}})()",
let creator_state = execute_browser_skill_script(
browser_tool,
"zhihu-navigate",
"open_creator_entry.js",
json!({ "desired_target": "article_editor" }),
ZHIHU_DOMAIN,
)?;
if is_login_required_payload(&creator_state) {
return Ok(build_login_block_message(payload_current_url(&creator_state)));
}
if payload_status(&creator_state) == Some("creator_home") {
return Ok(build_creator_entry_missing_message(payload_current_url(
&creator_state,
)));
}
navigate_to_editor_after_creator_entry(transport, browser_tool, &creator_state)?;
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-write.prepare_article_editor".to_string(),
})?;
let editor_state = execute_browser_skill_script(
browser_tool,
"zhihu-write",
"prepare_article_editor.js",
json!({ "desired_mode": if publish_mode { "publish" } else { "draft" } }),
ZHIHU_EDITOR_DOMAIN,
)?;
if is_login_required_payload(&editor_state) {
return Ok(build_login_block_message(payload_current_url(&editor_state)));
}
if payload_status(&editor_state) != Some("editor_ready") {
return Ok(build_editor_unavailable_message(payload_current_url(&editor_state)));
}
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-write.fill_article_draft".to_string(),
})?;
let fill_result = execute_browser_skill_script(
browser_tool,
"zhihu-write",
"fill_article_draft.js",
json!({
"title": article.title,
"body": article.body,
"publish_mode": publish_mode.to_string(),
}),
ZHIHU_EDITOR_DOMAIN,
)?;
if is_login_required_payload(&fill_result) {
return Ok(build_login_block_message(payload_current_url(&fill_result)));
}
match payload_status(&fill_result) {
Some("draft_ready") => Ok(format!("已进入知乎文章编辑器并写入草稿《{}", article.title)),
Some("publish_clicked") | Some("publish_submitted") => {
Ok(format!("已提交知乎文章发布流程《{}", article.title))
}
Some("publish_button_missing") => Err(PipeError::Protocol(
"知乎文章流程失败:未找到发布按钮".to_string(),
)),
Some("editor_not_ready") => Err(PipeError::Protocol(
"知乎文章流程失败:编辑器尚未准备就绪".to_string(),
)),
_ => Err(PipeError::Protocol(format!(
"知乎文章流程失败:浏览器脚本返回了未知状态 {fill_result}"
))),
}
}
fn execute_zhihu_article_entry_route<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
) -> Result<String, PipeError> {
navigate_zhihu_page(transport, browser_tool, ZHIHU_CREATOR_URL)?;
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-navigate.open_creator_entry".to_string(),
})?;
let creator_state = execute_browser_skill_script(
browser_tool,
"zhihu-navigate",
"open_creator_entry.js",
json!({ "desired_target": "article_editor" }),
ZHIHU_DOMAIN,
)?;
if is_login_required_payload(&creator_state) {
return Ok(build_login_block_message(payload_current_url(&creator_state)));
}
if payload_status(&creator_state) == Some("creator_home") {
return Ok(build_creator_entry_missing_message(payload_current_url(
&creator_state,
)));
}
navigate_to_editor_after_creator_entry(transport, browser_tool, &creator_state)?;
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: "call zhihu-write.prepare_article_editor".to_string(),
})?;
let editor_state = execute_browser_skill_script(
browser_tool,
"zhihu-write",
"prepare_article_editor.js",
json!({ "desired_mode": "draft" }),
ZHIHU_EDITOR_DOMAIN,
)?;
if is_login_required_payload(&editor_state) {
return Ok(build_login_block_message(payload_current_url(&editor_state)));
}
if payload_status(&editor_state) == Some("editor_ready") {
return Ok("已进入知乎文章编辑器。".to_string());
}
Ok(build_editor_unavailable_message(payload_current_url(&editor_state)))
}
fn load_hotlist_extractor_script(top_n: usize) -> Result<String, PipeError> {
load_browser_skill_script(
"zhihu-hotlist",
"extract_hotlist.js",
json!({ "top_n": top_n.to_string() }),
script
))
)
}
fn parse_hotlist_items_payload(payload: &Value) -> Result<Vec<HotlistItem>, PipeError> {
@@ -283,3 +469,262 @@ fn extract_top_n(instruction: &str) -> usize {
.filter(|value| *value > 0)
.unwrap_or(10)
}
fn navigate_zhihu_page<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
url: &str,
) -> Result<(), PipeError> {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {url}"),
})?;
let response = browser_tool.invoke(Action::Navigate, json!({ "url": url }), ZHIHU_DOMAIN)?;
if response.success {
Ok(())
} else {
Err(PipeError::Protocol(format!("navigate failed: {}", response.data)))
}
}
fn execute_browser_skill_script<T: Transport + 'static>(
browser_tool: &BrowserPipeTool<T>,
skill_name: &str,
script_name: &str,
args: Value,
expected_domain: &str,
) -> Result<Value, PipeError> {
let wrapped_script = load_browser_skill_script(skill_name, script_name, args)?;
let response = browser_tool.invoke(
Action::Eval,
json!({ "script": wrapped_script }),
expected_domain,
)?;
if !response.success {
return Err(PipeError::Protocol(format!(
"browser script failed: {}",
response.data
)));
}
Ok(normalize_payload(response.data.get("text").unwrap_or(&response.data)))
}
fn navigate_to_editor_after_creator_entry<T: Transport + 'static>(
transport: &T,
browser_tool: &BrowserPipeTool<T>,
creator_state: &Value,
) -> Result<(), PipeError> {
let status = payload_status(creator_state);
if status == Some("editor_ready") {
return Ok(());
}
let target_url = payload_next_url(creator_state).unwrap_or(ZHIHU_EDITOR_URL);
if status == Some("creator_entry_clicked") || status == Some("creator_entry_found") {
transport.send(&AgentMessage::LogEntry {
level: "info".to_string(),
message: format!("navigate {target_url}"),
})?;
let response = browser_tool.invoke(
Action::Navigate,
json!({ "url": target_url }),
ZHIHU_EDITOR_DOMAIN,
)?;
if !response.success {
return Err(PipeError::Protocol(format!(
"navigate failed: {}",
response.data
)));
}
}
Ok(())
}
fn load_browser_skill_script(
skill_name: &str,
script_name: &str,
args: Value,
) -> Result<String, PipeError> {
let script_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")))
.join("skill_lib")
.join("skills")
.join(skill_name)
.join("scripts")
.join(script_name);
let script = fs::read_to_string(&script_path).map_err(|err| {
PipeError::Protocol(format!(
"failed to read browser script {}: {err}",
script_path.display()
))
})?;
Ok(format!(
"(function() {{\nconst args = {};\n{}\n}})()",
args,
script
))
}
fn normalize_payload(payload: &Value) -> Value {
if let Some(text) = payload.as_str() {
serde_json::from_str::<Value>(text).unwrap_or_else(|_| Value::String(text.to_string()))
} else {
payload.clone()
}
}
fn payload_status(payload: &Value) -> Option<&str> {
payload.get("status").and_then(Value::as_str)
}
fn payload_current_url(payload: &Value) -> Option<&str> {
payload.get("current_url").and_then(Value::as_str)
}
fn payload_next_url(payload: &Value) -> Option<&str> {
payload.get("next_url").and_then(Value::as_str)
}
fn is_login_required_payload(payload: &Value) -> bool {
payload_status(payload) == Some("login_required")
}
fn build_login_block_message(current_url: Option<&str>) -> String {
let suffix = current_url
.filter(|value| !value.is_empty())
.map(|value| format!(" 当前页面:{value}"))
.unwrap_or_default();
format!(
"当前知乎浏览器会话未登录,无法进入创作者中心或发布文章。请先登录知乎后再继续。{suffix}"
)
}
fn build_editor_unavailable_message(current_url: Option<&str>) -> String {
let suffix = current_url
.filter(|value| !value.is_empty())
.map(|value| format!(" 当前页面:{value}"))
.unwrap_or_default();
format!(
"已进入知乎创作者流程,但当前未检测到文章编辑器。可能原因是页面仍在加载、当前账号暂未开放写作入口,或知乎页面结构发生变化。请确认当前知乎账号已登录且具备发文权限,然后在页面稳定后重试。{suffix}"
)
}
fn build_creator_entry_missing_message(current_url: Option<&str>) -> String {
let suffix = current_url
.filter(|value| !value.is_empty())
.map(|value| format!(" 当前页面:{value}"))
.unwrap_or_default();
format!(
"已进入知乎创作者中心,但当前未找到“写文章”入口。请确认页面已加载完成,且当前账号具备文章发布入口后再重试。{suffix}"
)
}
fn build_publish_confirmation_message(article: &ArticleDraft) -> String {
format!(
"我已收到这篇知乎文章的内容,但在当前会话里还没有拿到明确发布确认。\n\n标题:{}\n正文:{}\n\n如果你确定现在要发布,请直接回复“确认发布”。在收到明确确认之前,我不会执行任何发布动作。",
article.title,
article.body
)
}
fn has_explicit_publish_confirmation(instruction: &str) -> bool {
let trimmed = instruction.trim();
trimmed.contains("确认发布") ||
trimmed.contains("确认发表") ||
trimmed.contains("现在发布") ||
trimmed.contains("立即发布") ||
trimmed.contains("可以发布")
}
fn task_requests_zhihu_article_entry(
instruction: &str,
page_url: Option<&str>,
page_title: Option<&str>,
) -> bool {
if !crate::runtime::is_zhihu_write_task(instruction, page_url, page_title) {
return false;
}
let normalized = instruction.to_ascii_lowercase();
let asks_to_open = normalized.contains("open") ||
normalized.contains("goto") ||
normalized.contains("go to") ||
instruction.contains("打开") ||
instruction.contains("进入") ||
instruction.contains("");
let mentions_entry = instruction.contains("页面") ||
instruction.contains("入口") ||
instruction.contains("创作中心") ||
instruction.contains("写文章") ||
instruction.contains("发文章");
let has_article_inputs = parse_article_draft(instruction).is_some();
asks_to_open && mentions_entry && !has_article_inputs
}
fn extract_article_draft(
instruction: &str,
messages: &[ConversationMessage],
) -> Option<ArticleDraft> {
parse_article_draft(instruction).or_else(|| {
messages
.iter()
.rev()
.filter(|message| message.role == "user")
.find_map(|message| parse_article_draft(&message.content))
})
}
fn parse_article_draft(text: &str) -> Option<ArticleDraft> {
let normalized = normalize_article_draft_input(text);
let title_re = Regex::new(r"(?m)^标题[:]\s*(.+?)\s*$").expect("valid zhihu title regex");
let body_re =
Regex::new(r"(?s)正文[:]\s*(.+)$").expect("valid zhihu body regex");
let inline_title_re = Regex::new(r"标题(?:是|为)\s*([^,\n]+)")
.expect("valid inline zhihu title regex");
let inline_body_re = Regex::new(r"(?s)正文(?:是|为)\s*(.+)$")
.expect("valid inline zhihu body regex");
let title = title_re
.captures(&normalized)
.and_then(|capture| capture.get(1))
.map(|value| value.as_str().trim().to_string())
.or_else(|| {
inline_title_re
.captures(&normalized)
.and_then(|capture| capture.get(1))
.map(|value| value.as_str().trim().to_string())
})?;
let body = body_re
.captures(&normalized)
.and_then(|capture| capture.get(1))
.map(|value| value.as_str().trim().to_string())
.or_else(|| {
inline_body_re
.captures(&normalized)
.and_then(|capture| capture.get(1))
.map(|value| value.as_str().trim().trim_end_matches('。').to_string())
})?;
if title.is_empty() || body.is_empty() {
return None;
}
Some(ArticleDraft { title, body })
}
fn normalize_article_draft_input(text: &str) -> String {
let trimmed = text.trim();
let unquoted = if trimmed.len() >= 2 &&
((trimmed.starts_with('"') && trimmed.ends_with('"')) ||
(trimmed.starts_with('\'') && trimmed.ends_with('\'')))
{
&trimmed[1..trimmed.len() - 1]
} else {
trimmed
};
unquoted.replace("\\n", "\n")
}