From f2c5ab096e9e648f501d4e1a619116d6e2704e6d Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Wed, 1 Apr 2026 12:00:00 +0800 Subject: [PATCH] Improve parsing of srcset according to whatwg spec (#74) * Improve parsing of srcset according to whatwg spec; e.g. srcset="http://example.com/size400,300/img.jpg 640w" * Trim/normalise the output in order to conform to prior version; solely to keep tests happy * Add test case for embedded commas in a srcset url as well as support for future possible parenthesis in descriptor string * Drop 'future proof' test as it causes an error message in test output --- src/snapshot.ts | 86 ++++++++++++++++++++------ test/__snapshots__/integration.ts.snap | 3 +- test/html/with-relative-res.html | 1 + 3 files changed, 69 insertions(+), 21 deletions(-) diff --git a/src/snapshot.ts b/src/snapshot.ts index 94a2b904..29b1f16c 100644 --- a/src/snapshot.ts +++ b/src/snapshot.ts @@ -110,32 +110,78 @@ export function absoluteToStylesheet( ); } +const SRCSET_NOT_SPACES = /^[^ \t\n\r\u000c]+/; // Don't use \s, to avoid matching non-breaking space +const SRCSET_COMMAS_OR_SPACES = /^[, \t\n\r\u000c]+/; function getAbsoluteSrcsetString(doc: Document, attributeValue: string) { + /* + run absoluteToDoc over every url in the srcset + + this is adapted from https://github.com/albell/parse-srcset/ + without the parsing of the descriptors (we return these as-is) + parce-srcset is in turn based on + https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute + */ if (attributeValue.trim() === '') { return attributeValue; } - const srcsetValues = attributeValue.split(','); - // srcset attributes is defined as such: - // srcset = "url size,url1 size1" - const resultingSrcsetString = srcsetValues - .map((srcItem) => { - // removing all but middle spaces - const trimmedSrcItem = srcItem.trimLeft().trimRight(); - const urlAndSize = trimmedSrcItem.split(' '); - // this means we have both 0:url and 1:size - if (urlAndSize.length === 2) { - const absUrl = absoluteToDoc(doc, urlAndSize[0]); - return `${absUrl} ${urlAndSize[1]}`; - } else if (urlAndSize.length === 1) { - const absUrl = absoluteToDoc(doc, urlAndSize[0]); - return `${absUrl}`; - } - return ''; - }) - .join(', '); + let pos = 0; - return resultingSrcsetString; + function collectCharacters(regEx: RegExp) { + var chars, + match = regEx.exec(attributeValue.substring(pos)); + if (match) { + chars = match[0]; + pos += chars.length; + return chars; + } + return ''; + } + + let output = []; + while (true) { + collectCharacters(SRCSET_COMMAS_OR_SPACES); + if (pos >= attributeValue.length) { + break; + } + // don't split on commas within urls + let url = collectCharacters(SRCSET_NOT_SPACES); + if (url.slice(-1) === ',') { + // aside: according to spec more than one comma at the end is a parse error, but we ignore that + url = absoluteToDoc(doc, url.substring(0, url.length - 1)) + // the trailing comma splits the srcset, so the interpretion is that + // another url will follow, and the descriptor is empty + output.push(url); + } else { + let descriptorsStr = ''; + url = absoluteToDoc(doc, url) + let inParens = false; + while (true) { + let c = attributeValue.charAt(pos); + if (c === '') { + output.push((url + descriptorsStr).trim()); + break; + } else if (!inParens) { + if (c === ',') { + pos += 1; + output.push((url + descriptorsStr).trim()); + break; // parse the next url + } else if (c === '(') { + inParens = true; + } + } else { + // in parenthesis; ignore commas + // (parenthesis may be supported by future additions to spec) + if (c === ')') { + inParens = false; + } + } + descriptorsStr += c; + pos += 1; + } + } + } + return output.join(', '); } export function absoluteToDoc(doc: Document, attributeValue: string): string { diff --git a/test/__snapshots__/integration.ts.snap b/test/__snapshots__/integration.ts.snap index 72b317f1..afa7b2a0 100644 --- a/test/__snapshots__/integration.ts.snap +++ b/test/__snapshots__/integration.ts.snap @@ -276,7 +276,8 @@ exports[`[html file]: with-relative-res.html 1`] = ` \\"\\" \\"\\" \\"\\" - \\"\\"" + \\"\\" + \\"\\"" `; exports[`[html file]: with-script.html 1`] = ` diff --git a/test/html/with-relative-res.html b/test/html/with-relative-res.html index 7505d5a3..c390dc53 100644 --- a/test/html/with-relative-res.html +++ b/test/html/with-relative-res.html @@ -16,5 +16,6 @@ + \ No newline at end of file