diff --git a/src/generated_scene/analyzer.rs b/src/generated_scene/analyzer.rs index f694dbd..c16bf67 100644 --- a/src/generated_scene/analyzer.rs +++ b/src/generated_scene/analyzer.rs @@ -109,6 +109,9 @@ pub fn analyze_scene_source_with_hint( let expected_domain = meta_content(&html, "sgclaw-expected-domain"); let entry_script = meta_content(&html, "sgclaw-entry-script"); + // Auto-extract expected_domain from external script URLs if not provided via meta tag + let expected_domain = expected_domain.or_else(|| extract_domain_from_external_scripts(&html)); + // All fields are optional - generator will use placeholders for missing values // This allows third-party scenes without meta tags to be processed @@ -156,3 +159,44 @@ fn attr_value(tag: &str, attr: &str) -> Option { let end = rest.find(quote)?; Some(rest[..end].to_string()) } + +/// Extract domain from the first external script URL found in HTML. +/// +/// Looks for ` + + +
测试页面
+ + diff --git a/tests/scene_generator_test.rs b/tests/scene_generator_test.rs index da7acaf..1d23340 100644 --- a/tests/scene_generator_test.rs +++ b/tests/scene_generator_test.rs @@ -166,3 +166,20 @@ fn generator_emits_monitoring_template() { // 监测类不应该有 org/period resolver assert!(!generated_manifest.contains("resolver = \"dictionary_entity\"")); } + +#[test] +fn analyzer_extracts_domain_from_external_script() { + // external_script fixture has no expected_domain meta tag, + // but has an external script URL that should be auto-extracted + let analysis = analyze_scene_source(Path::new( + "tests/fixtures/generated_scene/external_script", + )) + .unwrap(); + + assert_eq!(analysis.scene_kind, SceneKind::ReportCollection); + // Should auto-extract "25.215.213.128:18080" from script src + assert_eq!( + analysis.bootstrap.expected_domain.as_deref(), + Some("25.215.213.128:18080") + ); +}