feat: auto-extract expected_domain from external script URLs
When HTML has no sgclaw-expected-domain meta tag, analyzer now scans for external script URLs (http:// or https://) and extracts the domain (host:port) as expected_domain. Example: <script src="http://25.215.213.128:18080/a_js/YPTAPI.js"></script> → expected_domain = "25.215.213.128:18080" This reduces manual editing required for third-party scenes. 🤖 Generated with [Qoder][https://qoder.com]
This commit is contained in:
@@ -109,6 +109,9 @@ pub fn analyze_scene_source_with_hint(
|
||||
let expected_domain = meta_content(&html, "sgclaw-expected-domain");
|
||||
let entry_script = meta_content(&html, "sgclaw-entry-script");
|
||||
|
||||
// Auto-extract expected_domain from external script URLs if not provided via meta tag
|
||||
let expected_domain = expected_domain.or_else(|| extract_domain_from_external_scripts(&html));
|
||||
|
||||
// All fields are optional - generator will use placeholders for missing values
|
||||
// This allows third-party scenes without meta tags to be processed
|
||||
|
||||
@@ -156,3 +159,44 @@ fn attr_value(tag: &str, attr: &str) -> Option<String> {
|
||||
let end = rest.find(quote)?;
|
||||
Some(rest[..end].to_string())
|
||||
}
|
||||
|
||||
/// Extract domain from the first external script URL found in HTML.
|
||||
///
|
||||
/// Looks for `<script src="http://...">` or `<script src="https://...">` tags
|
||||
/// and extracts the host:port portion as expected_domain.
|
||||
fn extract_domain_from_external_scripts(html: &str) -> Option<String> {
|
||||
for tag in html
|
||||
.split('<')
|
||||
.filter(|fragment| fragment.starts_with("script"))
|
||||
{
|
||||
if let Some(src) = attr_value(tag, "src") {
|
||||
// Check if it's an external URL (http:// or https://)
|
||||
if src.starts_with("http://") || src.starts_with("https://") {
|
||||
// Extract domain (host:port) from URL
|
||||
if let Some(domain) = extract_domain_from_url(&src) {
|
||||
return Some(domain);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Extract domain (host:port) from a URL string.
|
||||
fn extract_domain_from_url(url: &str) -> Option<String> {
|
||||
// Remove protocol prefix
|
||||
let rest = url
|
||||
.strip_prefix("http://")
|
||||
.or_else(|| url.strip_prefix("https://"))?;
|
||||
|
||||
// Find the end of domain (first '/' or end of string)
|
||||
let domain_end = rest.find('/').unwrap_or(rest.len());
|
||||
let domain = &rest[..domain_end];
|
||||
|
||||
// Return non-empty domain
|
||||
if domain.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(domain.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user