diff --git a/src/generated_scene/analyzer.rs b/src/generated_scene/analyzer.rs index 2b802b9..97ccc89 100644 --- a/src/generated_scene/analyzer.rs +++ b/src/generated_scene/analyzer.rs @@ -349,6 +349,18 @@ fn extract_named_url_candidates(content: &str) -> Vec { candidates } +/// Round a byte index down to the nearest char boundary. +fn safe_char_boundary(s: &str, byte_idx: usize) -> usize { + if byte_idx >= s.len() { + return s.len(); + } + s.char_indices() + .rev() + .find(|(idx, _)| *idx <= byte_idx) + .map(|(idx, _)| idx) + .unwrap_or(0) +} + fn extract_endpoints( content: &str, source_url_candidates: &[String], @@ -366,8 +378,8 @@ fn extract_endpoints( if !looks_like_business_url(url) { continue; } - let context_start = url_match.start().saturating_sub(120); - let context_end = (url_match.end() + 120).min(content.len()); + let context_start = safe_char_boundary(content, url_match.start().saturating_sub(120)); + let context_end = safe_char_boundary(content, (url_match.end() + 120).min(content.len())); let context = &content[context_start..context_end]; let method = method_re .captures(context)