feat: add SceneKind::Monitoring and scene_kind_hint param to analyzer

- Add SceneKind::Monitoring enum variant with from_str/as_str helpers
- Add analyze_scene_source_with_hint function accepting optional scene kind hint
- User hint takes priority over meta tag, defaults to ReportCollection
- ReportCollection requires target_url, expected_domain, entry_script
- Monitoring type has optional fields
- Add test cases for hint parameter behavior
- Update non_report fixture with required meta tags for ReportCollection

🤖 Generated with [Qoder][https://qoder.com]
This commit is contained in:
木炎
2026-04-16 23:33:24 +08:00
parent 45b54ab007
commit 67fe17302e
3 changed files with 385 additions and 0 deletions

View File

@@ -0,0 +1,228 @@
use std::fmt;
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SceneKind {
ReportCollection,
Monitoring,
}
impl SceneKind {
pub fn from_str(s: &str) -> Option<Self> {
match s {
"report_collection" => Some(Self::ReportCollection),
"monitoring" => Some(Self::Monitoring),
_ => None,
}
}
pub fn as_str(&self) -> &'static str {
match self {
Self::ReportCollection => "report_collection",
Self::Monitoring => "monitoring",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ToolKind {
BrowserScript,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BootstrapAnalysis {
pub target_url: Option<String>,
pub expected_domain: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SceneSourceAnalysis {
pub scene_kind: SceneKind,
pub tool_kind: ToolKind,
pub bootstrap: BootstrapAnalysis,
pub collection_entry_script: Option<String>,
pub source_dir: PathBuf,
}
#[derive(Debug)]
pub struct AnalyzeSceneError {
message: String,
}
impl AnalyzeSceneError {
fn new(message: impl Into<String>) -> Self {
Self {
message: message.into(),
}
}
}
impl fmt::Display for AnalyzeSceneError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.message)
}
}
impl std::error::Error for AnalyzeSceneError {}
/// Analyze scene source with an optional scene kind hint.
///
/// The hint parameter takes priority over meta tags.
/// If neither hint nor meta tag is present, defaults to ReportCollection.
pub fn analyze_scene_source_with_hint(
source_dir: &Path,
scene_kind_hint: Option<SceneKind>,
) -> Result<SceneSourceAnalysis, AnalyzeSceneError> {
let index_path = source_dir.join("index.html");
let html = fs::read_to_string(&index_path).map_err(|err| {
AnalyzeSceneError::new(format!(
"failed to read scene source {}: {err}",
index_path.display()
))
})?;
// Determine scene kind: hint > meta > default
let scene_kind = if let Some(hint) = scene_kind_hint {
hint
} else {
let meta_kind = meta_content(&html, "sgclaw-scene-kind");
meta_kind
.as_deref()
.and_then(SceneKind::from_str)
.unwrap_or(SceneKind::ReportCollection)
};
// Tool kind is currently only browser_script
let tool_kind = meta_content(&html, "sgclaw-tool-kind");
if let Some(ref tk) = tool_kind {
if tk != "browser_script" {
return Err(AnalyzeSceneError::new(format!(
"unsupported tool kind: {}",
tk
)));
}
}
// Default tool kind to BrowserScript
let target_url = meta_content(&html, "sgclaw-target-url");
let expected_domain = meta_content(&html, "sgclaw-expected-domain");
let entry_script = meta_content(&html, "sgclaw-entry-script");
// Validate required fields based on scene kind
match scene_kind {
SceneKind::ReportCollection => {
// ReportCollection requires target_url, expected_domain, and entry_script
if target_url.as_deref().unwrap_or_default().trim().is_empty()
|| expected_domain
.as_deref()
.unwrap_or_default()
.trim()
.is_empty()
|| entry_script
.as_deref()
.unwrap_or_default()
.trim()
.is_empty()
{
return Err(AnalyzeSceneError::new(
"report_collection scene requires target_url, expected_domain, and entry_script",
));
}
}
SceneKind::Monitoring => {
// Monitoring type has optional fields - no validation needed
}
}
Ok(SceneSourceAnalysis {
scene_kind,
tool_kind: ToolKind::BrowserScript,
bootstrap: BootstrapAnalysis {
target_url,
expected_domain,
},
collection_entry_script: entry_script,
source_dir: source_dir.to_path_buf(),
})
}
/// Analyze scene source (compatibility wrapper).
///
/// Requires meta tags to be present. For new code, use `analyze_scene_source_with_hint`.
pub fn analyze_scene_source(source_dir: &Path) -> Result<SceneSourceAnalysis, AnalyzeSceneError> {
let index_path = source_dir.join("index.html");
let html = fs::read_to_string(&index_path).map_err(|err| {
AnalyzeSceneError::new(format!(
"failed to read scene source {}: {err}",
index_path.display()
))
})?;
let scene_kind = meta_content(&html, "sgclaw-scene-kind");
let tool_kind = meta_content(&html, "sgclaw-tool-kind");
if scene_kind.as_deref() != Some("report_collection")
|| tool_kind.as_deref() != Some("browser_script")
{
return Err(AnalyzeSceneError::new(
"generated scene v1 supports report/collection browser_script only",
));
}
let target_url = meta_content(&html, "sgclaw-target-url");
let expected_domain = meta_content(&html, "sgclaw-expected-domain");
let entry_script = meta_content(&html, "sgclaw-entry-script");
if target_url.as_deref().unwrap_or_default().trim().is_empty()
|| expected_domain
.as_deref()
.unwrap_or_default()
.trim()
.is_empty()
|| entry_script
.as_deref()
.unwrap_or_default()
.trim()
.is_empty()
{
return Err(AnalyzeSceneError::new(
"generated scene source must declare target url, expected domain, and entry script",
));
}
Ok(SceneSourceAnalysis {
scene_kind: SceneKind::ReportCollection,
tool_kind: ToolKind::BrowserScript,
bootstrap: BootstrapAnalysis {
target_url,
expected_domain,
},
collection_entry_script: entry_script,
source_dir: source_dir.to_path_buf(),
})
}
fn meta_content(html: &str, name: &str) -> Option<String> {
for tag in html
.split('<')
.filter(|fragment| fragment.starts_with("meta"))
{
let tag = tag.split('>').next().unwrap_or(tag);
if attr_value(tag, "name").as_deref() == Some(name) {
return attr_value(tag, "content").map(|value| value.trim().to_string());
}
}
None
}
fn attr_value(tag: &str, attr: &str) -> Option<String> {
let needle = format!("{attr}=");
let start = tag.find(&needle)? + needle.len();
let rest = &tag[start..];
let quote = rest.chars().next()?;
if quote != '"' && quote != '\'' {
return None;
}
let rest = &rest[quote.len_utf8()..];
let end = rest.find(quote)?;
Some(rest[..end].to_string())
}