feat: auto-extract expected_domain from external script URLs

When HTML has no sgclaw-expected-domain meta tag, analyzer now scans
for external script URLs (http:// or https://) and extracts the
domain (host:port) as expected_domain.

Example:
  <script src="http://25.215.213.128:18080/a_js/YPTAPI.js"></script>
  → expected_domain = "25.215.213.128:18080"

This reduces manual editing required for third-party scenes.

🤖 Generated with [Qoder][https://qoder.com]
This commit is contained in:
木炎
2026-04-17 00:14:05 +08:00
parent 464f18c672
commit ce072c2ebe
3 changed files with 72 additions and 0 deletions

View File

@@ -166,3 +166,20 @@ fn generator_emits_monitoring_template() {
// 监测类不应该有 org/period resolver
assert!(!generated_manifest.contains("resolver = \"dictionary_entity\""));
}
#[test]
fn analyzer_extracts_domain_from_external_script() {
// external_script fixture has no expected_domain meta tag,
// but has an external script URL that should be auto-extracted
let analysis = analyze_scene_source(Path::new(
"tests/fixtures/generated_scene/external_script",
))
.unwrap();
assert_eq!(analysis.scene_kind, SceneKind::ReportCollection);
// Should auto-extract "25.215.213.128:18080" from script src
assert_eq!(
analysis.bootstrap.expected_domain.as_deref(),
Some("25.215.213.128:18080")
);
}