Files
claw/tests/deterministic_submit_test.rs

558 lines
18 KiB
Rust

use std::fs;
use std::panic::AssertUnwindSafe;
use std::path::{Path, PathBuf};
use std::sync::{Mutex, OnceLock};
use chrono::{Datelike, Local};
use sgclaw::compat::deterministic_submit::{
decide_deterministic_submit, decide_deterministic_submit_with_skills_dir,
DeterministicSubmitDecision,
};
use uuid::Uuid;
fn temp_root(prefix: &str) -> PathBuf {
let root = std::env::temp_dir().join(format!("{prefix}-{}", Uuid::new_v4()));
fs::create_dir_all(&root).unwrap();
root
}
fn current_dir_lock() -> &'static Mutex<()> {
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
LOCK.get_or_init(|| Mutex::new(()))
}
fn with_temp_workspace<T>(prefix: &str, test: impl FnOnce(&Path) -> T) -> T {
let _guard = current_dir_lock().lock().unwrap();
let workspace_root = temp_root(prefix);
let original_dir = std::env::current_dir().unwrap();
std::env::set_current_dir(&workspace_root).unwrap();
let result = std::panic::catch_unwind(AssertUnwindSafe(|| test(&workspace_root)));
std::env::set_current_dir(original_dir).unwrap();
match result {
Ok(value) => value,
Err(payload) => std::panic::resume_unwind(payload),
}
}
fn default_skills_root(workspace_root: &Path) -> PathBuf {
workspace_root
.join(".sgclaw-zeroclaw-workspace")
.join("skills")
}
fn browser_script_skill_toml(skill_name: &str, tool_name: &str) -> String {
format!(
r#"[skill]
name = "{skill_name}"
description = "test skill"
version = "0.1.0"
[[tools]]
name = "{tool_name}"
description = "test tool"
kind = "browser_script"
command = "scripts/{tool_name}.js"
"#
)
}
fn toml_array(values: &[&str]) -> String {
if values.is_empty() {
return "[]".to_string();
}
let joined = values
.iter()
.map(|value| format!("\"{value}\""))
.collect::<Vec<_>>()
.join(", ");
format!("[{joined}]")
}
fn scene_toml(
scene_id: &str,
skill_name: &str,
tool_name: &str,
expected_domain: &str,
target_url: &str,
include_keywords: &[&str],
exclude_keywords: &[&str],
page_title_keywords: &[&str],
) -> String {
format!(
r#"[scene]
id = "{scene_id}"
skill = "{skill_name}"
tool = "{tool_name}"
kind = "browser_script"
version = "0.1.0"
category = "report_collection"
[manifest]
schema_version = "1"
[bootstrap]
expected_domain = "{expected_domain}"
target_url = "{target_url}"
page_title_keywords = {page_title_keywords}
requires_target_page = true
[deterministic]
suffix = "。。。"
include_keywords = {include_keywords}
exclude_keywords = {exclude_keywords}
[[params]]
name = "org"
resolver = "dictionary_entity"
required = true
prompt_missing = "已命中台区线损报表技能,但缺少供电单位。"
prompt_ambiguous = "已命中台区线损报表技能,但供电单位存在歧义,请补充更完整名称。"
[params.resolver_config]
dictionary_ref = "references/org-dictionary.json"
output_label_field = "org_label"
output_code_field = "org_code"
[[params]]
name = "period"
resolver = "month_week_period"
required = true
prompt_missing = "已命中台区线损报表技能,但缺少统计周期。"
prompt_ambiguous = "已命中台区线损报表技能,但统计周期存在歧义,请补充更明确表达。"
[artifact]
type = "report-artifact"
success_status = ["ok", "partial", "empty"]
failure_status = ["blocked", "error"]
"#,
include_keywords = toml_array(include_keywords),
exclude_keywords = toml_array(exclude_keywords),
page_title_keywords = toml_array(page_title_keywords),
)
}
fn org_dictionary_json() -> &'static str {
r#"[
{
"label": "国网兰州供电公司",
"code": "62401",
"aliases": ["国网兰州供电公司", "兰州供电公司", "兰州公司"]
},
{
"label": "城关供电分公司",
"code": "6240108",
"aliases": ["城关供电分公司", "城关分公司"]
},
{
"label": "国网天水供电公司",
"code": "62403",
"aliases": ["国网天水供电公司", "天水供电公司", "天水公司"]
}
]"#
}
fn write_scene_skill(
skills_root: &Path,
scene_id: &str,
skill_name: &str,
tool_name: &str,
expected_domain: &str,
target_url: &str,
include_keywords: &[&str],
exclude_keywords: &[&str],
page_title_keywords: &[&str],
) {
let skill_root = skills_root.join(skill_name);
fs::create_dir_all(skill_root.join("references")).unwrap();
fs::write(
skill_root.join("SKILL.toml"),
browser_script_skill_toml(skill_name, tool_name),
)
.unwrap();
fs::write(
skill_root.join("scene.toml"),
scene_toml(
scene_id,
skill_name,
tool_name,
expected_domain,
target_url,
include_keywords,
exclude_keywords,
page_title_keywords,
),
)
.unwrap();
fs::write(
skill_root.join("references").join("org-dictionary.json"),
org_dictionary_json(),
)
.unwrap();
}
fn assert_prompt_contains(decision: DeterministicSubmitDecision, needle: &str) {
match decision {
DeterministicSubmitDecision::Prompt { summary } => {
assert!(summary.contains(needle), "unexpected prompt: {summary}");
}
other => panic!("expected prompt containing {needle}, got {other:?}"),
}
}
#[test]
fn deterministic_submit_matches_final_bundle_lineloss_alias() {
let skills_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("dist")
.join("sgclaw_102_pseudoprod_validation_bundle_2026-04-20")
.join("skills");
let decision = decide_deterministic_submit_with_skills_dir(
"\u{53f0}\u{533a}\u{7ebf}\u{635f}\u{3002}\u{3002}\u{3002}",
None,
None,
&skills_root,
);
match decision {
DeterministicSubmitDecision::Prompt { summary } => {
assert!(
!summary.contains(
"\u{786e}\u{5b9a}\u{6027}\u{63d0}\u{4ea4}\u{5f53}\u{524d}\u{53ea}\u{652f}\u{6301}\u{5df2}\u{6ce8}\u{518c}"
),
"expected line-loss alias to reach registered scene resolver, got unsupported prompt: {summary}"
);
}
DeterministicSubmitDecision::Execute(plan) => {
assert_eq!(plan.tool_name, "sweep-030-scene.collect_sweep_030_scene");
}
DeterministicSubmitDecision::NotDeterministic => {
panic!("expected deterministic line-loss alias to be recognized")
}
}
}
#[test]
fn deterministic_submit_defaults_final_bundle_lineloss_month_to_page_semantics() {
let skills_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("dist")
.join("sgclaw_102_pseudoprod_validation_bundle_2026-04-20")
.join("skills");
let decision = decide_deterministic_submit_with_skills_dir(
"\u{5170}\u{5dde}\u{516c}\u{53f8} \u{7ebf}\u{635f}\u{5927}\u{6570}\u{636e} \u{6708}\u{7d2f}\u{8ba1}\u{7ebf}\u{635f}\u{7edf}\u{8ba1}\u{5206}\u{6790}\u{3002}\u{3002}\u{3002}",
None,
None,
&skills_root,
);
let today = Local::now().date_naive();
let (year, month) = if today.month() == 1 {
(today.year() - 1, 12)
} else {
(today.year(), today.month() - 1)
};
let expected_month = format!("{year}-{month:02}");
match decision {
DeterministicSubmitDecision::Execute(plan) => {
assert_eq!(plan.tool_name, "sweep-030-scene.collect_sweep_030_scene");
assert_eq!(plan.period_mode, "month");
assert_eq!(plan.period_mode_code, "1");
assert_eq!(plan.period_value, expected_month);
assert!(plan.period_payload.contains("fdate"));
}
other => panic!("expected execute plan with default month semantics, got {other:?}"),
}
}
#[test]
fn deterministic_submit_uses_registry_backed_scene_plan() {
with_temp_workspace("sgclaw-deterministic-scene", |workspace_root| {
let skills_root = default_skills_root(workspace_root);
write_scene_skill(
&skills_root,
"tq-lineloss-report",
"tq-lineloss-report",
"collect_lineloss",
"20.76.57.61",
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor",
&["线损", "统计分析"],
&["知乎"],
&["线损报表"],
);
let decision = decide_deterministic_submit(
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。",
None,
None,
);
match decision {
DeterministicSubmitDecision::Execute(plan) => {
assert_eq!(plan.tool_name, "tq-lineloss-report.collect_lineloss");
assert_eq!(plan.expected_domain, "20.76.57.61");
assert_eq!(
plan.target_url,
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor"
);
assert_eq!(plan.org_label, "国网兰州供电公司");
assert_eq!(plan.org_code, "62401");
assert_eq!(plan.period_mode, "month");
assert_eq!(plan.period_mode_code, "1");
assert_eq!(plan.period_value, "2026-03");
assert!(plan.period_payload.contains("fdate"));
}
other => panic!("expected execute plan, got {other:?}"),
}
});
}
#[test]
fn deterministic_submit_requires_exact_suffix() {
with_temp_workspace("sgclaw-deterministic-suffix", |workspace_root| {
let skills_root = default_skills_root(workspace_root);
write_scene_skill(
&skills_root,
"tq-lineloss-report",
"tq-lineloss-report",
"collect_lineloss",
"20.76.57.61",
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor",
&["线损", "统计分析"],
&["知乎"],
&["线损报表"],
);
assert!(matches!(
decide_deterministic_submit(
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。",
None,
None
),
DeterministicSubmitDecision::Execute(_)
));
for instruction in [
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03",
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03...",
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。。",
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。 ",
] {
assert!(matches!(
decide_deterministic_submit(instruction, None, None),
DeterministicSubmitDecision::NotDeterministic
));
}
});
}
#[test]
fn deterministic_submit_fails_closed_on_scene_ambiguity() {
with_temp_workspace("sgclaw-deterministic-ambiguity", |workspace_root| {
let skills_root = default_skills_root(workspace_root);
write_scene_skill(
&skills_root,
"tq-lineloss-report",
"tq-lineloss-report",
"collect_lineloss",
"20.76.57.61",
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor",
&["统计分析"],
&[],
&["线损报表"],
);
write_scene_skill(
&skills_root,
"other-report",
"other-report",
"collect_other",
"20.76.57.61",
"http://20.76.57.61:18080/other/report",
&["统计分析"],
&[],
&["其他报表"],
);
let decision = decide_deterministic_submit("兰州公司 统计分析。。。", None, None);
match decision {
DeterministicSubmitDecision::Prompt { summary } => {
assert!(
summary.contains("多个确定性场景"),
"unexpected prompt: {summary}"
);
assert!(
summary.contains("tq-lineloss-report"),
"unexpected prompt: {summary}"
);
assert!(
summary.contains("other-report"),
"unexpected prompt: {summary}"
);
}
other => panic!("expected ambiguity prompt, got {other:?}"),
}
});
}
#[test]
fn deterministic_submit_prompts_for_missing_period_instead_of_defaulting() {
with_temp_workspace("sgclaw-deterministic-period", |workspace_root| {
let skills_root = default_skills_root(workspace_root);
write_scene_skill(
&skills_root,
"tq-lineloss-report",
"tq-lineloss-report",
"collect_lineloss",
"20.76.57.61",
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor",
&["线损", "统计分析"],
&["知乎"],
&["线损报表"],
);
assert_prompt_contains(
decide_deterministic_submit("兰州公司 月累计 统计分析。。。", None, None),
"缺少统计周期",
);
assert_prompt_contains(
decide_deterministic_submit("兰州公司 周累计 统计分析。。。", None, None),
"缺少统计周期",
);
});
}
#[test]
fn deterministic_submit_uses_page_context_to_break_ties_before_keyword_only_match() {
with_temp_workspace("sgclaw-deterministic-page-context", |workspace_root| {
let skills_root = default_skills_root(workspace_root);
write_scene_skill(
&skills_root,
"tq-lineloss-report",
"tq-lineloss-report",
"collect_lineloss",
"20.76.57.61",
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor",
&["统计分析"],
&[],
&["线损报表"],
);
write_scene_skill(
&skills_root,
"fault-report",
"fault-report",
"collect_fault",
"20.76.57.61",
"http://20.76.57.61:18080/fault/report",
&["统计分析"],
&[],
&["95598工单"],
);
let decision = decide_deterministic_submit(
"兰州公司 月累计 统计分析 2026-03。。。",
Some("http://20.76.57.61:18080/#/lineloss"),
Some("台区线损报表"),
);
match decision {
DeterministicSubmitDecision::Execute(plan) => {
assert_eq!(plan.tool_name, "tq-lineloss-report.collect_lineloss");
assert_eq!(
plan.target_url,
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor"
);
}
other => panic!("expected page context to select lineloss scene, got {other:?}"),
}
});
}
#[test]
fn deterministic_submit_unsupported_suffix_request_returns_supported_scene_message() {
with_temp_workspace("sgclaw-deterministic-unsupported", |workspace_root| {
let skills_root = default_skills_root(workspace_root);
write_scene_skill(
&skills_root,
"tq-lineloss-report",
"tq-lineloss-report",
"collect_lineloss",
"20.76.57.61",
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor",
&["线损", "统计分析"],
&["知乎"],
&["线损报表"],
);
assert_prompt_contains(
decide_deterministic_submit("打开知乎热榜。。。", None, None),
"已注册的报表采集场景",
);
});
}
#[test]
fn zhihu_without_suffix_remains_not_deterministic() {
assert!(matches!(
decide_deterministic_submit(
"打开知乎热榜",
Some("https://www.zhihu.com/hot"),
Some("知乎热榜")
),
DeterministicSubmitDecision::NotDeterministic
));
}
#[test]
fn committed_lineloss_sample_package_drives_deterministic_submit() {
let _guard = current_dir_lock().lock().unwrap();
let skills_dir = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("examples")
.join("generated_scene_platform")
.join("skills");
let decision = decide_deterministic_submit_with_skills_dir(
"兰州公司 台区线损大数据 月累计线损率统计分析 2026-03。。。",
None,
None,
&skills_dir,
);
match decision {
DeterministicSubmitDecision::Execute(plan) => {
assert_eq!(plan.tool_name, "tq-lineloss-report.collect_lineloss");
assert_eq!(plan.expected_domain, "20.76.57.61");
assert_eq!(
plan.target_url,
"http://20.76.57.61:18080/gsllys/tqLinelossStatis/tqQualifyRateMonitor"
);
assert_eq!(plan.org_label, "国网兰州供电公司");
assert_eq!(plan.org_code, "62401");
assert_eq!(plan.period_mode, "month");
assert_eq!(plan.period_mode_code, "1");
assert_eq!(plan.period_value, "2026-03");
assert!(plan.period_payload.contains("fdate"));
assert_eq!(
plan.postprocess
.as_ref()
.map(|postprocess| postprocess.exporter.as_str()),
Some("xlsx_report")
);
}
other => panic!("expected committed sample package execute plan, got {other:?}"),
}
assert_prompt_contains(
decide_deterministic_submit_with_skills_dir(
"兰州公司 台区线损大数据 月累计线损率统计分析。。。",
None,
None,
&skills_dir,
),
"缺少统计周期",
);
}