Single style capture (#1437)

Support a contrived/rare case where a <style> element has multiple text node children (this is usually only possible to recreate via javascript append) ... this PR fixes cases where there are subsequent text mutations to these nodes; previously these would have been lost

* In this scenario, a new CSS comment may now be inserted into the captured `_cssText` for a <style> element to show where it should be broken up into text elements upon replay: `/* rr_split */`
* The new 'can record and replay style mutations' test is the principal way to the problematic scenarios, and is a detailed 'catch-all' test with many checks to cover most of the ways things can fail
* There are new tests for splitting/rebuilding the css using the rr_split marker
* The prior 'dynamic stylesheet' route is now the main route for serializing a stylesheet; dynamic stylesheet were missed out in #1533 but that case is now covered with this PR

This PR was originally extracted from #1475 so the  initial motivation was to change the approach on stringifying <style> elements to do so in a single place.  This is also the motivating factor for always serializing <style> elements via the `_cssText` attribute rather than in it's childNodes; in #1475 we will be delaying populating `_cssText` for performance and instead recorrding them as assets.

Thanks for the detailed review to  Justin Halsall <Juice10@users.noreply.github.com> & Yun Feng <https://github.com/YunFeng0817>
This commit is contained in:
Eoghan Murray
2024-08-06 13:09:06 +01:00
committed by GitHub
parent 8837fe39aa
commit 5fbb904edb
19 changed files with 1595 additions and 387 deletions

View File

@@ -1,6 +1,8 @@
import { mediaSelectorPlugin, pseudoClassPlugin } from './css';
import {
type serializedNodeWithId,
type serializedElementNodeWithId,
type serializedTextNodeWithId,
NodeType,
type tagMap,
type elementNode,
@@ -78,6 +80,77 @@ export function createCache(): BuildCache {
};
}
/**
* undo splitCssText/markCssSplits
* (would move to utils.ts but uses `adaptCssForReplay`)
*/
export function applyCssSplits(
n: serializedElementNodeWithId,
cssText: string,
hackCss: boolean,
cache: BuildCache,
): void {
const childTextNodes: serializedTextNodeWithId[] = [];
for (const scn of n.childNodes) {
if (scn.type === NodeType.Text) {
childTextNodes.push(scn);
}
}
const cssTextSplits = cssText.split('/* rr_split */');
while (
cssTextSplits.length > 1 &&
cssTextSplits.length > childTextNodes.length
) {
// unexpected: remerge the last two so that we don't discard any css
cssTextSplits.splice(-2, 2, cssTextSplits.slice(-2).join(''));
}
for (let i = 0; i < childTextNodes.length; i++) {
const childTextNode = childTextNodes[i];
const cssTextSection = cssTextSplits[i];
if (childTextNode && cssTextSection) {
// id will be assigned when these child nodes are
// iterated over in buildNodeWithSN
childTextNode.textContent = hackCss
? adaptCssForReplay(cssTextSection, cache)
: cssTextSection;
}
}
}
/**
* Normally a <style> element has a single textNode containing the rules.
* During serialization, we bypass this (`styleEl.sheet`) to get the rules the
* browser sees and serialize this to a special _cssText attribute, blanking
* out any text nodes. This function reverses that and also handles cases where
* there were no textNode children present (dynamic css/or a <link> element) as
* well as multiple textNodes, which need to be repopulated (based on presence of
* a special `rr_split` marker in case they are modified by subsequent mutations.
*/
export function buildStyleNode(
n: serializedElementNodeWithId,
styleEl: HTMLStyleElement, // when inlined, a <link type="stylesheet"> also gets rebuilt as a <style>
cssText: string,
options: {
doc: Document;
hackCss: boolean;
cache: BuildCache;
},
) {
const { doc, hackCss, cache } = options;
if (n.childNodes.length) {
applyCssSplits(n, cssText, hackCss, cache);
} else {
if (hackCss) {
cssText = adaptCssForReplay(cssText, cache);
}
/**
<link> element or dynamic <style> are serialized without any child nodes
we create the text node without an ID or presence in mirror as it can't
*/
styleEl.appendChild(doc.createTextNode(cssText));
}
}
function buildNode(
n: serializedNodeWithId,
options: {
@@ -154,14 +227,13 @@ function buildNode(
continue;
}
const isTextarea = tagName === 'textarea' && name === 'value';
const isRemoteOrDynamicCss = tagName === 'style' && name === '_cssText';
if (isRemoteOrDynamicCss && hackCss && typeof value === 'string') {
value = adaptCssForReplay(value, cache);
}
if ((isTextarea || isRemoteOrDynamicCss) && typeof value === 'string') {
// https://github.com/rrweb-io/rrweb/issues/112
// https://github.com/rrweb-io/rrweb/pull/1351
if (typeof value !== 'string') {
// pass
} else if (tagName === 'style' && name === '_cssText') {
buildStyleNode(n, node as HTMLStyleElement, value, options);
continue; // no need to set _cssText as attribute
} else if (tagName === 'textarea' && name === 'value') {
// create without an ID or presence in mirror
node.appendChild(doc.createTextNode(value));
n.childNodes = []; // value overrides childNodes
continue;
@@ -317,11 +389,11 @@ function buildNode(
return node;
}
case NodeType.Text:
return doc.createTextNode(
n.isStyle && hackCss
? adaptCssForReplay(n.textContent, cache)
: n.textContent,
);
if (n.isStyle && hackCss) {
// support legacy style
return doc.createTextNode(adaptCssForReplay(n.textContent, cache));
}
return doc.createTextNode(n.textContent);
case NodeType.CDATA:
return doc.createCDATASection(n.textContent);
case NodeType.Comment:

View File

@@ -27,6 +27,7 @@ import {
toLowerCase,
extractFileExtension,
absolutifyURLs,
markCssSplits,
} from './utils';
import dom from '@rrweb/utils';
@@ -403,6 +404,7 @@ function serializeNode(
* `newlyAddedElement: true` skips scrollTop and scrollLeft check
*/
newlyAddedElement?: boolean;
cssCaptured?: boolean;
},
): serializedNode | false {
const {
@@ -420,6 +422,7 @@ function serializeNode(
recordCanvas,
keepIframeSrcFn,
newlyAddedElement = false,
cssCaptured = false,
} = options;
// Only record root id when document object is not the base document
const rootId = getRootId(doc, mirror);
@@ -466,6 +469,7 @@ function serializeNode(
needsMask,
maskTextFn,
rootId,
cssCaptured,
});
case n.CDATA_SECTION_NODE:
return {
@@ -497,48 +501,38 @@ function serializeTextNode(
needsMask: boolean;
maskTextFn: MaskTextFn | undefined;
rootId: number | undefined;
cssCaptured?: boolean;
},
): serializedNode {
const { needsMask, maskTextFn, rootId } = options;
const { needsMask, maskTextFn, rootId, cssCaptured } = options;
// The parent node may not be a html element which has a tagName attribute.
// So just let it be undefined which is ok in this use case.
const parent = dom.parentNode(n);
const parentTagName = parent && (parent as HTMLElement).tagName;
let text = dom.textContent(n);
let textContent: string | null = '';
const isStyle = parentTagName === 'STYLE' ? true : undefined;
const isScript = parentTagName === 'SCRIPT' ? true : undefined;
if (isStyle && text) {
try {
// try to read style sheet
if (n.nextSibling || n.previousSibling) {
// This is not the only child of the stylesheet.
// We can't read all of the sheet's .cssRules and expect them
// to _only_ include the current rule(s) added by the text node.
// So we'll be conservative and keep textContent as-is.
} else if ((parent as HTMLStyleElement).sheet?.cssRules) {
text = stringifyStylesheet((parent as HTMLStyleElement).sheet!);
}
} catch (err) {
console.warn(
`Cannot get CSS styles from text's parentNode. Error: ${err as string}`,
n,
);
}
text = absolutifyURLs(text, getHref(options.doc));
}
if (isScript) {
text = 'SCRIPT_PLACEHOLDER';
textContent = 'SCRIPT_PLACEHOLDER';
} else if (!cssCaptured) {
textContent = dom.textContent(n);
if (isStyle && textContent) {
// mutation only: we don't need to use stringifyStylesheet
// as a <style> text node mutation obliterates any previous
// programmatic rule manipulation (.insertRule etc.)
// so the current textContent represents the most up to date state
textContent = absolutifyURLs(textContent, getHref(options.doc));
}
}
if (!isStyle && !isScript && text && needsMask) {
text = maskTextFn
? maskTextFn(text, dom.parentElement(n))
: text.replace(/[\S]/g, '*');
if (!isStyle && !isScript && textContent && needsMask) {
textContent = maskTextFn
? maskTextFn(textContent, dom.parentElement(n))
: textContent.replace(/[\S]/g, '*');
}
return {
type: NodeType.Text,
textContent: text || '',
isStyle,
textContent: textContent || '',
rootId,
};
}
@@ -608,17 +602,14 @@ function serializeElementNode(
attributes._cssText = cssText;
}
}
// dynamic stylesheet
if (
tagName === 'style' &&
(n as HTMLStyleElement).sheet &&
// TODO: Currently we only try to get dynamic stylesheet when it is an empty style element
!(n.innerText || dom.textContent(n) || '').trim().length
) {
const cssText = stringifyStylesheet(
if (tagName === 'style' && (n as HTMLStyleElement).sheet) {
let cssText = stringifyStylesheet(
(n as HTMLStyleElement).sheet as CSSStyleSheet,
);
if (cssText) {
if (n.childNodes.length > 1) {
cssText = markCssSplits(cssText, n as HTMLStyleElement);
}
attributes._cssText = cssText;
}
}
@@ -937,6 +928,7 @@ export function serializeNodeWithId(
node: serializedElementNodeWithId,
) => unknown;
stylesheetLoadTimeout?: number;
cssCaptured?: boolean;
},
): serializedNodeWithId | null {
const {
@@ -962,6 +954,7 @@ export function serializeNodeWithId(
stylesheetLoadTimeout = 5000,
keepIframeSrcFn = () => false,
newlyAddedElement = false,
cssCaptured = false,
} = options;
let { needsMask } = options;
let { preserveWhiteSpace = true } = options;
@@ -992,6 +985,7 @@ export function serializeNodeWithId(
recordCanvas,
keepIframeSrcFn,
newlyAddedElement,
cssCaptured,
});
if (!_serializedNode) {
// TODO: dev only
@@ -1007,7 +1001,6 @@ export function serializeNodeWithId(
slimDOMExcluded(_serializedNode, slimDOMOptions) ||
(!preserveWhiteSpace &&
_serializedNode.type === NodeType.Text &&
!_serializedNode.isStyle &&
!_serializedNode.textContent.replace(/^\s+|\s+$/gm, '').length)
) {
id = IGNORED_NODE;
@@ -1072,6 +1065,7 @@ export function serializeNodeWithId(
onStylesheetLoad,
stylesheetLoadTimeout,
keepIframeSrcFn,
cssCaptured: false,
};
if (
@@ -1081,6 +1075,13 @@ export function serializeNodeWithId(
) {
// value parameter in DOM reflects the correct value, so ignore childNode
} else {
if (
serializedNode.type === NodeType.Element &&
(serializedNode as elementNode).attributes._cssText !== undefined &&
typeof serializedNode.attributes._cssText === 'string'
) {
bypassOptions.cssCaptured = true;
}
for (const childN of Array.from(dom.childNodes(n))) {
const serializedChildNode = serializeNodeWithId(childN, bypassOptions);
if (serializedChildNode) {

View File

@@ -20,9 +20,18 @@ export type documentTypeNode = {
systemId: string;
};
export type attributes = {
[key: string]: string | number | true | null;
type cssTextKeyAttr = {
_cssText?: string;
};
export type attributes = cssTextKeyAttr & {
[key: string]:
| string
| number // properties e.g. rr_scrollLeft or rr_mediaCurrentTime
| true // e.g. checked on <input type="radio">
| null; // an indication that an attribute was removed (during a mutation)
};
export type legacyAttributes = {
/**
* @deprecated old bug in rrweb was causing these to always be set
@@ -45,6 +54,10 @@ export type elementNode = {
export type textNode = {
type: NodeType.Text;
textContent: string;
/**
* @deprecated styles are now always snapshotted against parent <style> element
* style mutations can still happen via an added textNode, but they don't need this attribute for correct replay
*/
isStyle?: true;
};
@@ -78,6 +91,11 @@ export type serializedElementNodeWithId = Extract<
Record<'type', NodeType.Element>
>;
export type serializedTextNodeWithId = Extract<
serializedNodeWithId,
Record<'type', NodeType.Text>
>;
export type tagMap = {
[key: string]: string;
};

View File

@@ -99,14 +99,28 @@ export function escapeImportStatement(rule: CSSImportRule): string {
return statement.join(' ') + ';';
}
/*
* serialize the css rules from the .sheet property
* for <link rel="stylesheet"> elements, this is the only way of getting the rules without a FETCH
* for <style> elements, this is less preferable to looking at childNodes[0].textContent
* (which will include vendor prefixed rules which may not be used or visible to the recorded browser,
* but which might be needed by the replayer browser)
* however, at snapshot time, we don't know whether the style element has suffered
* any programmatic manipulation prior to the snapshot, in which case the .sheet would be more up to date
*/
export function stringifyStylesheet(s: CSSStyleSheet): string | null {
try {
const rules = s.rules || s.cssRules;
if (!rules) {
return null;
}
let sheetHref = s.href;
if (!sheetHref && s.ownerNode && s.ownerNode.ownerDocument) {
// an inline <style> element
sheetHref = s.ownerNode.ownerDocument.location.href;
}
const stringifiedRules = Array.from(rules, (rule: CSSRule) =>
stringifyRule(rule, s.href),
stringifyRule(rule, sheetHref),
).join('');
return fixBrowserCompatibilityIssuesInCSS(stringifiedRules);
} catch (error) {
@@ -428,3 +442,62 @@ export function absolutifyURLs(cssText: string | null, href: string): string {
},
);
}
/**
* Intention is to normalize by remove spaces, semicolons and CSS comments
* so that we can compare css as authored vs. output of stringifyStylesheet
*/
export function normalizeCssString(cssText: string): string {
return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
}
/**
* Maps the output of stringifyStylesheet to individual text nodes of a <style> element
* performance is not considered as this is anticipated to be very much an edge case
* (javascript is needed to add extra text nodes to a <style>)
*/
export function splitCssText(
cssText: string,
style: HTMLStyleElement,
): string[] {
const childNodes = Array.from(style.childNodes);
const splits: string[] = [];
if (childNodes.length > 1 && cssText && typeof cssText === 'string') {
const cssTextNorm = normalizeCssString(cssText);
for (let i = 1; i < childNodes.length; i++) {
if (
childNodes[i].textContent &&
typeof childNodes[i].textContent === 'string'
) {
const textContentNorm = normalizeCssString(childNodes[i].textContent!);
for (let j = 3; j < textContentNorm.length; j++) {
// find a substring that appears only once
const bit = textContentNorm.substring(0, j);
if (cssTextNorm.split(bit).length === 2) {
const splitNorm = cssTextNorm.indexOf(bit);
// find the split point in the original text
for (let k = splitNorm; k < cssText.length; k++) {
if (
normalizeCssString(cssText.substring(0, k)).length === splitNorm
) {
splits.push(cssText.substring(0, k));
cssText = cssText.substring(k);
break;
}
}
break;
}
}
}
}
}
splits.push(cssText); // either the full thing if no splits were found, or the last split
return splits;
}
export function markCssSplits(
cssText: string,
style: HTMLStyleElement,
): string {
return splitCssText(cssText, style).join('/* rr_split */');
}