Improve parsing of srcset according to whatwg spec (#74)

* Improve parsing of srcset according to whatwg spec; e.g. srcset="http://example.com/size400,300/img.jpg 640w"

* Trim/normalise the output in order to conform to prior version; solely to keep tests happy

* Add test case for embedded commas in a srcset url as well as support for future possible parenthesis in descriptor string

* Drop 'future proof' test as it causes an error message in test output
This commit is contained in:
Eoghan Murray
2026-04-01 12:00:00 +08:00
committed by GitHub
parent 628fee9292
commit f2c5ab096e
3 changed files with 69 additions and 21 deletions

View File

@@ -110,32 +110,78 @@ export function absoluteToStylesheet(
);
}
const SRCSET_NOT_SPACES = /^[^ \t\n\r\u000c]+/; // Don't use \s, to avoid matching non-breaking space
const SRCSET_COMMAS_OR_SPACES = /^[, \t\n\r\u000c]+/;
function getAbsoluteSrcsetString(doc: Document, attributeValue: string) {
/*
run absoluteToDoc over every url in the srcset
this is adapted from https://github.com/albell/parse-srcset/
without the parsing of the descriptors (we return these as-is)
parce-srcset is in turn based on
https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute
*/
if (attributeValue.trim() === '') {
return attributeValue;
}
const srcsetValues = attributeValue.split(',');
// srcset attributes is defined as such:
// srcset = "url size,url1 size1"
const resultingSrcsetString = srcsetValues
.map((srcItem) => {
// removing all but middle spaces
const trimmedSrcItem = srcItem.trimLeft().trimRight();
const urlAndSize = trimmedSrcItem.split(' ');
// this means we have both 0:url and 1:size
if (urlAndSize.length === 2) {
const absUrl = absoluteToDoc(doc, urlAndSize[0]);
return `${absUrl} ${urlAndSize[1]}`;
} else if (urlAndSize.length === 1) {
const absUrl = absoluteToDoc(doc, urlAndSize[0]);
return `${absUrl}`;
}
return '';
})
.join(', ');
let pos = 0;
return resultingSrcsetString;
function collectCharacters(regEx: RegExp) {
var chars,
match = regEx.exec(attributeValue.substring(pos));
if (match) {
chars = match[0];
pos += chars.length;
return chars;
}
return '';
}
let output = [];
while (true) {
collectCharacters(SRCSET_COMMAS_OR_SPACES);
if (pos >= attributeValue.length) {
break;
}
// don't split on commas within urls
let url = collectCharacters(SRCSET_NOT_SPACES);
if (url.slice(-1) === ',') {
// aside: according to spec more than one comma at the end is a parse error, but we ignore that
url = absoluteToDoc(doc, url.substring(0, url.length - 1))
// the trailing comma splits the srcset, so the interpretion is that
// another url will follow, and the descriptor is empty
output.push(url);
} else {
let descriptorsStr = '';
url = absoluteToDoc(doc, url)
let inParens = false;
while (true) {
let c = attributeValue.charAt(pos);
if (c === '') {
output.push((url + descriptorsStr).trim());
break;
} else if (!inParens) {
if (c === ',') {
pos += 1;
output.push((url + descriptorsStr).trim());
break; // parse the next url
} else if (c === '(') {
inParens = true;
}
} else {
// in parenthesis; ignore commas
// (parenthesis may be supported by future additions to spec)
if (c === ')') {
inParens = false;
}
}
descriptorsStr += c;
pos += 1;
}
}
}
return output.join(', ');
}
export function absoluteToDoc(doc: Document, attributeValue: string): string {

View File

@@ -276,7 +276,8 @@ exports[`[html file]: with-relative-res.html 1`] = `
<img src=\\"http://localhost:3030/a.jpg\\" alt=\\"\\" srcset=\\"\\" />
<img src=\\"http://localhost:3030/a.jpg\\" alt=\\"\\" srcset=\\"http://localhost:3030/a.jpg\\" />
<img src=\\"http://localhost:3030/a.jpg\\" alt=\\"\\" srcset=\\"http://exmple.com/a.jpg\\" />
<img src=\\"http://localhost:3030/a.jpg\\" alt=\\"\\" srcset=\\"http://localhost:3030/a.jpg 3x, http://localhost:3030/a.jpg 45x, http://localhost:3030/b.png\\" /></body></html>"
<img src=\\"http://localhost:3030/a.jpg\\" alt=\\"\\" srcset=\\"http://localhost:3030/a.jpg 3x, http://localhost:3030/a.jpg 45x, http://localhost:3030/b.png\\" />
<img src=\\"http://localhost:3030/a.jpg\\" alt=\\"\\" srcset=\\"http://localhost:3030/300,400/a.jpg 300w, http://localhost:3030/b.png\\" /></body></html>"
`;
exports[`[html file]: with-script.html 1`] = `

View File

@@ -16,5 +16,6 @@
<img src="./a.jpg" alt="" srcset="/a.jpg">
<img src="./a.jpg" alt="" srcset="http://exmple.com/a.jpg ">
<img src="./a.jpg" alt="" srcset="/a.jpg 3x, /a.jpg 45x , /b.png">
<img src="./a.jpg" alt="" srcset="/300,400/a.jpg 300w,b.png">
</body>
</html>