From ce072c2ebe2524540bdc0d081dcb1db15e345947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=A8=E7=82=8E?= <635735027@qq.com> Date: Fri, 17 Apr 2026 00:14:05 +0800 Subject: [PATCH] feat: auto-extract expected_domain from external script URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When HTML has no sgclaw-expected-domain meta tag, analyzer now scans for external script URLs (http:// or https://) and extracts the domain (host:port) as expected_domain. Example: β†’ expected_domain = "25.215.213.128:18080" This reduces manual editing required for third-party scenes. πŸ€– Generated with [Qoder][https://qoder.com] --- src/generated_scene/analyzer.rs | 44 +++++++++++++++++++ .../external_script/index.html | 11 +++++ tests/scene_generator_test.rs | 17 +++++++ 3 files changed, 72 insertions(+) create mode 100644 tests/fixtures/generated_scene/external_script/index.html diff --git a/src/generated_scene/analyzer.rs b/src/generated_scene/analyzer.rs index f694dbd..c16bf67 100644 --- a/src/generated_scene/analyzer.rs +++ b/src/generated_scene/analyzer.rs @@ -109,6 +109,9 @@ pub fn analyze_scene_source_with_hint( let expected_domain = meta_content(&html, "sgclaw-expected-domain"); let entry_script = meta_content(&html, "sgclaw-entry-script"); + // Auto-extract expected_domain from external script URLs if not provided via meta tag + let expected_domain = expected_domain.or_else(|| extract_domain_from_external_scripts(&html)); + // All fields are optional - generator will use placeholders for missing values // This allows third-party scenes without meta tags to be processed @@ -156,3 +159,44 @@ fn attr_value(tag: &str, attr: &str) -> Option { let end = rest.find(quote)?; Some(rest[..end].to_string()) } + +/// Extract domain from the first external script URL found in HTML. +/// +/// Looks for ` + + +
桋试鑡青
+ + diff --git a/tests/scene_generator_test.rs b/tests/scene_generator_test.rs index da7acaf..1d23340 100644 --- a/tests/scene_generator_test.rs +++ b/tests/scene_generator_test.rs @@ -166,3 +166,20 @@ fn generator_emits_monitoring_template() { // 监桋类不应θ―₯ζœ‰ org/period resolver assert!(!generated_manifest.contains("resolver = \"dictionary_entity\"")); } + +#[test] +fn analyzer_extracts_domain_from_external_script() { + // external_script fixture has no expected_domain meta tag, + // but has an external script URL that should be auto-extracted + let analysis = analyze_scene_source(Path::new( + "tests/fixtures/generated_scene/external_script", + )) + .unwrap(); + + assert_eq!(analysis.scene_kind, SceneKind::ReportCollection); + // Should auto-extract "25.215.213.128:18080" from script src + assert_eq!( + analysis.bootstrap.expected_domain.as_deref(), + Some("25.215.213.128:18080") + ); +}