400 lines
12 KiB
Rust
400 lines
12 KiB
Rust
use std::path::Path;
|
|
|
|
use serde_json::{Map, Value};
|
|
use thiserror::Error;
|
|
|
|
use crate::compat::deterministic_submit::{
|
|
DeterministicExecutionPlan, DeterministicSubmitDecision,
|
|
};
|
|
use crate::compat::scene_platform::registry::{
|
|
load_scene_registry, SceneRegistryEntry, SceneRegistryError,
|
|
};
|
|
use crate::compat::scene_platform::resolvers::{
|
|
resolve_required_scene_params, ResolverError, SceneParamResolution,
|
|
};
|
|
use crate::scene_contract::PostprocessSection;
|
|
|
|
const DETERMINISTIC_SUFFIX: &str = "\u{3002}\u{3002}\u{3002}";
|
|
const DIAGNOSTIC_SCENE_ID: &str = "sweep-030-scene";
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct SceneExecutionPlan {
|
|
pub scene_id: String,
|
|
pub instruction: String,
|
|
pub tool_name: String,
|
|
pub expected_domain: String,
|
|
pub target_url: String,
|
|
pub args: Map<String, Value>,
|
|
pub success_statuses: Vec<String>,
|
|
pub failure_statuses: Vec<String>,
|
|
pub postprocess: Option<PostprocessSection>,
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
pub enum SceneDispatchError {
|
|
#[error(transparent)]
|
|
Registry(#[from] SceneRegistryError),
|
|
#[error(transparent)]
|
|
Resolver(#[from] ResolverError),
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct CandidateScene<'a> {
|
|
entry: &'a SceneRegistryEntry,
|
|
score: usize,
|
|
resolution: SceneParamResolution,
|
|
}
|
|
|
|
pub fn plan_deterministic_scene(
|
|
raw_instruction: &str,
|
|
page_url: Option<&str>,
|
|
page_title: Option<&str>,
|
|
skills_dir: &Path,
|
|
) -> Result<DeterministicSubmitDecision, SceneDispatchError> {
|
|
let Some(stripped_instruction) = strip_exact_suffix(raw_instruction) else {
|
|
return Ok(DeterministicSubmitDecision::NotDeterministic);
|
|
};
|
|
|
|
let instruction = stripped_instruction.trim();
|
|
log_deterministic_diag(format!(
|
|
"submit suffix=ok skills_dir={} skills_dir_exists={} instruction={} page_url={} page_title={}",
|
|
skills_dir.display(),
|
|
skills_dir.exists(),
|
|
instruction,
|
|
page_url.unwrap_or_default(),
|
|
page_title.unwrap_or_default()
|
|
));
|
|
if instruction.is_empty() {
|
|
log_deterministic_diag("unsupported: empty deterministic instruction");
|
|
return Ok(unsupported_scene_prompt());
|
|
}
|
|
|
|
let registry = load_scene_registry(skills_dir)?;
|
|
log_registry_diag(®istry);
|
|
if registry.is_empty() {
|
|
log_deterministic_diag("unsupported: scene registry is empty");
|
|
return Ok(unsupported_scene_prompt());
|
|
}
|
|
|
|
let mut candidates = Vec::new();
|
|
for entry in ®istry {
|
|
if entry.manifest.scene.id == DIAGNOSTIC_SCENE_ID {
|
|
log_scene_match_diag(entry, instruction);
|
|
}
|
|
let Some(score) = score_scene(entry, instruction, page_url, page_title) else {
|
|
continue;
|
|
};
|
|
let resolution = resolve_required_scene_params(
|
|
&entry.skill_root,
|
|
instruction,
|
|
&entry.manifest.scene.id,
|
|
&entry.manifest.params,
|
|
)?;
|
|
candidates.push(CandidateScene {
|
|
entry,
|
|
score,
|
|
resolution,
|
|
});
|
|
}
|
|
|
|
if candidates.is_empty() {
|
|
log_deterministic_diag(
|
|
"unsupported: no scene candidate matched include/suffix/exclude rules",
|
|
);
|
|
return Ok(unsupported_scene_prompt());
|
|
}
|
|
|
|
candidates.sort_by(|left, right| {
|
|
right
|
|
.score
|
|
.cmp(&left.score)
|
|
.then_with(|| {
|
|
right
|
|
.resolution
|
|
.resolved_required_count
|
|
.cmp(&left.resolution.resolved_required_count)
|
|
})
|
|
.then_with(|| {
|
|
left.entry
|
|
.manifest
|
|
.scene
|
|
.id
|
|
.cmp(&right.entry.manifest.scene.id)
|
|
})
|
|
});
|
|
|
|
if candidates.len() > 1 {
|
|
let best = &candidates[0];
|
|
let next = &candidates[1];
|
|
if best.score == next.score
|
|
&& best.resolution.resolved_required_count == next.resolution.resolved_required_count
|
|
{
|
|
return Ok(DeterministicSubmitDecision::Prompt {
|
|
summary: format!(
|
|
"已命中多个确定性场景({}、{}),请补充更明确关键词或页面上下文。",
|
|
best.entry.manifest.scene.id, next.entry.manifest.scene.id
|
|
),
|
|
});
|
|
}
|
|
}
|
|
|
|
let selected = candidates.remove(0);
|
|
log_deterministic_diag(format!(
|
|
"selected scene={} tool={} score={} resolved_required_count={}",
|
|
selected.entry.manifest.scene.id,
|
|
selected.entry.manifest.scene.tool,
|
|
selected.score,
|
|
selected.resolution.resolved_required_count
|
|
));
|
|
if let Some(summary) = selected.resolution.prompt {
|
|
log_deterministic_diag(format!(
|
|
"selected scene={} returned resolver prompt",
|
|
selected.entry.manifest.scene.id
|
|
));
|
|
return Ok(DeterministicSubmitDecision::Prompt { summary });
|
|
}
|
|
|
|
Ok(DeterministicSubmitDecision::Execute(to_deterministic_plan(
|
|
instruction,
|
|
build_scene_execution_plan(selected.entry, instruction, selected.resolution.args),
|
|
)))
|
|
}
|
|
|
|
fn build_scene_execution_plan(
|
|
entry: &SceneRegistryEntry,
|
|
instruction: &str,
|
|
mut args: Map<String, Value>,
|
|
) -> SceneExecutionPlan {
|
|
let bootstrap = entry
|
|
.manifest
|
|
.bootstrap
|
|
.as_ref()
|
|
.expect("report scene registry should only contain manifests with bootstrap");
|
|
let artifact = entry
|
|
.manifest
|
|
.artifact
|
|
.as_ref()
|
|
.expect("report scene registry should only contain manifests with artifact");
|
|
args.insert(
|
|
"expected_domain".to_string(),
|
|
Value::String(bootstrap.expected_domain.clone()),
|
|
);
|
|
args.insert(
|
|
"target_url".to_string(),
|
|
Value::String(bootstrap.target_url.clone()),
|
|
);
|
|
|
|
SceneExecutionPlan {
|
|
scene_id: entry.manifest.scene.id.clone(),
|
|
instruction: instruction.to_string(),
|
|
tool_name: format!(
|
|
"{}.{}",
|
|
entry.manifest.scene.skill, entry.manifest.scene.tool
|
|
),
|
|
expected_domain: bootstrap.expected_domain.clone(),
|
|
target_url: bootstrap.target_url.clone(),
|
|
args,
|
|
success_statuses: artifact.success_status.clone(),
|
|
failure_statuses: artifact.failure_status.clone(),
|
|
postprocess: entry.manifest.postprocess.clone(),
|
|
}
|
|
}
|
|
|
|
fn to_deterministic_plan(
|
|
instruction: &str,
|
|
scene_plan: SceneExecutionPlan,
|
|
) -> DeterministicExecutionPlan {
|
|
let org_label = scene_plan
|
|
.args
|
|
.get("org_label")
|
|
.and_then(Value::as_str)
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
let org_code = scene_plan
|
|
.args
|
|
.get("org_code")
|
|
.and_then(Value::as_str)
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
let period_mode = scene_plan
|
|
.args
|
|
.get("period_mode")
|
|
.and_then(Value::as_str)
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
let period_mode_code = scene_plan
|
|
.args
|
|
.get("period_mode_code")
|
|
.and_then(Value::as_str)
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
let period_value = scene_plan
|
|
.args
|
|
.get("period_value")
|
|
.and_then(Value::as_str)
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
let period_payload = scene_plan
|
|
.args
|
|
.get("period_payload")
|
|
.cloned()
|
|
.map(|payload| serde_json::to_string(&payload).unwrap_or_else(|_| "{}".to_string()))
|
|
.unwrap_or_else(|| "{}".to_string());
|
|
|
|
DeterministicExecutionPlan {
|
|
instruction: instruction.to_string(),
|
|
tool_name: scene_plan.tool_name,
|
|
expected_domain: scene_plan.expected_domain,
|
|
target_url: scene_plan.target_url,
|
|
org_label,
|
|
org_code,
|
|
period_mode,
|
|
period_mode_code,
|
|
period_value,
|
|
period_payload,
|
|
postprocess: scene_plan.postprocess,
|
|
}
|
|
}
|
|
|
|
fn score_scene(
|
|
entry: &SceneRegistryEntry,
|
|
instruction: &str,
|
|
page_url: Option<&str>,
|
|
page_title: Option<&str>,
|
|
) -> Option<usize> {
|
|
let deterministic = entry.manifest.deterministic.as_ref()?;
|
|
let bootstrap = entry.manifest.bootstrap.as_ref()?;
|
|
if deterministic.suffix != DETERMINISTIC_SUFFIX {
|
|
return None;
|
|
}
|
|
|
|
let include_hits = deterministic
|
|
.include_keywords
|
|
.iter()
|
|
.filter(|keyword| !keyword.trim().is_empty() && instruction.contains(keyword.as_str()))
|
|
.count();
|
|
if include_hits == 0 {
|
|
return None;
|
|
}
|
|
|
|
if deterministic
|
|
.exclude_keywords
|
|
.iter()
|
|
.any(|keyword| !keyword.trim().is_empty() && instruction.contains(keyword.as_str()))
|
|
{
|
|
return None;
|
|
}
|
|
|
|
let mut score = include_hits * 10;
|
|
|
|
let normalized_url = page_url.unwrap_or_default().to_ascii_lowercase();
|
|
if !normalized_url.is_empty() {
|
|
if normalized_url.contains(
|
|
&bootstrap.expected_domain.to_ascii_lowercase(),
|
|
) {
|
|
score += 100;
|
|
} else if normalized_url.contains(&entry.manifest.scene.id.to_ascii_lowercase()) {
|
|
score += 40;
|
|
}
|
|
}
|
|
|
|
let title = page_title.unwrap_or_default();
|
|
if !title.is_empty()
|
|
&& entry
|
|
.manifest
|
|
.bootstrap
|
|
.as_ref()?
|
|
.page_title_keywords
|
|
.iter()
|
|
.any(|keyword| !keyword.trim().is_empty() && title.contains(keyword.as_str()))
|
|
{
|
|
score += 60;
|
|
}
|
|
|
|
Some(score)
|
|
}
|
|
|
|
fn strip_exact_suffix(raw_instruction: &str) -> Option<&str> {
|
|
let without_suffix = raw_instruction.strip_suffix(DETERMINISTIC_SUFFIX)?;
|
|
if without_suffix.ends_with('\u{3002}') {
|
|
return None;
|
|
}
|
|
Some(without_suffix)
|
|
}
|
|
|
|
fn log_registry_diag(registry: &[SceneRegistryEntry]) {
|
|
let sweep_030 = registry
|
|
.iter()
|
|
.find(|entry| entry.manifest.scene.id == DIAGNOSTIC_SCENE_ID);
|
|
match sweep_030 {
|
|
Some(entry) => log_deterministic_diag(format!(
|
|
"registry loaded count={} diagnostic_scene={} skill_root={} suffix_ok={} include_keywords={:?}",
|
|
registry.len(),
|
|
DIAGNOSTIC_SCENE_ID,
|
|
entry.skill_root.display(),
|
|
entry
|
|
.manifest
|
|
.deterministic
|
|
.as_ref()
|
|
.map(|deterministic| deterministic.suffix == DETERMINISTIC_SUFFIX)
|
|
.unwrap_or(false),
|
|
entry
|
|
.manifest
|
|
.deterministic
|
|
.as_ref()
|
|
.map(|deterministic| deterministic.include_keywords.clone())
|
|
.unwrap_or_default()
|
|
)),
|
|
None => log_deterministic_diag(format!(
|
|
"registry loaded count={} diagnostic_scene={} registered=false",
|
|
registry.len(),
|
|
DIAGNOSTIC_SCENE_ID
|
|
)),
|
|
}
|
|
}
|
|
|
|
fn log_scene_match_diag(entry: &SceneRegistryEntry, instruction: &str) {
|
|
let Some(deterministic) = entry.manifest.deterministic.as_ref() else {
|
|
log_deterministic_diag(format!(
|
|
"diagnostic_scene={} deterministic=false",
|
|
entry.manifest.scene.id
|
|
));
|
|
return;
|
|
};
|
|
let include_hits = deterministic
|
|
.include_keywords
|
|
.iter()
|
|
.filter(|keyword| !keyword.trim().is_empty() && instruction.contains(keyword.as_str()))
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
let exclude_hits = deterministic
|
|
.exclude_keywords
|
|
.iter()
|
|
.filter(|keyword| !keyword.trim().is_empty() && instruction.contains(keyword.as_str()))
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
log_deterministic_diag(format!(
|
|
"diagnostic_scene={} suffix_ok={} suffix_codepoints={} include_hits={:?} exclude_hits={:?}",
|
|
entry.manifest.scene.id,
|
|
deterministic.suffix == DETERMINISTIC_SUFFIX,
|
|
deterministic
|
|
.suffix
|
|
.chars()
|
|
.map(|ch| format!("U+{:04X}", ch as u32))
|
|
.collect::<Vec<_>>()
|
|
.join(","),
|
|
include_hits,
|
|
exclude_hits
|
|
));
|
|
}
|
|
|
|
fn log_deterministic_diag(message: impl AsRef<str>) {
|
|
eprintln!("[sgclaw deterministic] {}", message.as_ref());
|
|
}
|
|
|
|
fn unsupported_scene_prompt() -> DeterministicSubmitDecision {
|
|
DeterministicSubmitDecision::Prompt {
|
|
summary: "确定性提交当前只支持已注册的报表采集场景,请补充已支持的业务请求。".to_string(),
|
|
}
|
|
}
|