Single style capture (#1437)

Support a contrived/rare case where a <style> element has multiple text node children (this is usually only possible to recreate via javascript append) ... this PR fixes cases where there are subsequent text mutations to these nodes; previously these would have been lost * In this scenario, a new CSS comment may now be inserted into the captured `_cssText` for a <style> element to show where it should be broken up into text elements upon replay: `/* rr_split */` * The new 'can record and replay style mutations' test is the principal way to the problematic scenarios, and is a detailed 'catch-all' test with many checks to cover most of the ways things can fail * There are new tests for splitting/rebuilding the css using the rr_split marker * The prior 'dynamic stylesheet' route is now the main route for serializing a stylesheet; dynamic stylesheet were missed out in #1533 but that case is now covered with this PR This PR was originally extracted from #1475 so the initial motivation was to change the approach on stringifying <style> elements to do so in a single place. This is also the motivating factor for always serializing <style> elements via the `_cssText` attribute rather than in it's childNodes; in #1475 we will be delaying populating `_cssText` for performance and instead recorrding them as assets. Thanks for the detailed review to Justin Halsall <Juice10@users.noreply.github.com> & Yun Feng <https://github.com/YunFeng0817>
2024-08-06 13:09:06 +01:00
parent 8837fe39aa
commit 5fbb904edb
19 changed files with 1595 additions and 387 deletions
--- a/packages/rrweb-snapshot/src/rebuild.ts
+++ b/packages/rrweb-snapshot/src/rebuild.ts
@@ -1,6 +1,8 @@
 import { mediaSelectorPlugin, pseudoClassPlugin } from './css';
 import {
  type serializedNodeWithId,
+  type serializedElementNodeWithId,
+  type serializedTextNodeWithId,
  NodeType,
  type tagMap,
  type elementNode,
@@ -78,6 +80,77 @@ export function createCache(): BuildCache {
  };
 }

+/**
+ * undo splitCssText/markCssSplits
+ * (would move to utils.ts but uses `adaptCssForReplay`)
+ */
+export function applyCssSplits(
+  n: serializedElementNodeWithId,
+  cssText: string,
+  hackCss: boolean,
+  cache: BuildCache,
+): void {
+  const childTextNodes: serializedTextNodeWithId[] = [];
+  for (const scn of n.childNodes) {
+    if (scn.type === NodeType.Text) {
+      childTextNodes.push(scn);
+    }
+  }
+  const cssTextSplits = cssText.split('/* rr_split */');
+  while (
+    cssTextSplits.length > 1 &&
+    cssTextSplits.length > childTextNodes.length
+  ) {
+    // unexpected: remerge the last two so that we don't discard any css
+    cssTextSplits.splice(-2, 2, cssTextSplits.slice(-2).join(''));
+  }
+  for (let i = 0; i < childTextNodes.length; i++) {
+    const childTextNode = childTextNodes[i];
+    const cssTextSection = cssTextSplits[i];
+    if (childTextNode && cssTextSection) {
+      // id will be assigned when these child nodes are
+      // iterated over in buildNodeWithSN
+      childTextNode.textContent = hackCss
+        ? adaptCssForReplay(cssTextSection, cache)
+        : cssTextSection;
+    }
+  }
+}
+
+/**
+ * Normally a <style> element has a single textNode containing the rules.
+ * During serialization, we bypass this (`styleEl.sheet`) to get the rules the
+ * browser sees and serialize this to a special _cssText attribute, blanking
+ * out any text nodes. This function reverses that and also handles cases where
+ * there were no textNode children present (dynamic css/or a <link> element) as
+ * well as multiple textNodes, which need to be repopulated (based on presence of
+ * a special `rr_split` marker in case they are modified by subsequent mutations.
+ */
+export function buildStyleNode(
+  n: serializedElementNodeWithId,
+  styleEl: HTMLStyleElement, // when inlined, a <link type="stylesheet"> also gets rebuilt as a <style>
+  cssText: string,
+  options: {
+    doc: Document;
+    hackCss: boolean;
+    cache: BuildCache;
+  },
+) {
+  const { doc, hackCss, cache } = options;
+  if (n.childNodes.length) {
+    applyCssSplits(n, cssText, hackCss, cache);
+  } else {
+    if (hackCss) {
+      cssText = adaptCssForReplay(cssText, cache);
+    }
+    /**
+       <link> element or dynamic <style> are serialized without any child nodes
+       we create the text node without an ID or presence in mirror as it can't
+    */
+    styleEl.appendChild(doc.createTextNode(cssText));
+  }
+}
+
 function buildNode(
  n: serializedNodeWithId,
  options: {
@@ -154,14 +227,13 @@ function buildNode(
          continue;
        }

-        const isTextarea = tagName === 'textarea' && name === 'value';
-        const isRemoteOrDynamicCss = tagName === 'style' && name === '_cssText';
-        if (isRemoteOrDynamicCss && hackCss && typeof value === 'string') {
-          value = adaptCssForReplay(value, cache);
-        }
-        if ((isTextarea || isRemoteOrDynamicCss) && typeof value === 'string') {
-          // https://github.com/rrweb-io/rrweb/issues/112
-          // https://github.com/rrweb-io/rrweb/pull/1351
+        if (typeof value !== 'string') {
+          // pass
+        } else if (tagName === 'style' && name === '_cssText') {
+          buildStyleNode(n, node as HTMLStyleElement, value, options);
+          continue; // no need to set _cssText as attribute
+        } else if (tagName === 'textarea' && name === 'value') {
+          // create without an ID or presence in mirror
          node.appendChild(doc.createTextNode(value));
          n.childNodes = []; // value overrides childNodes
          continue;
@@ -317,11 +389,11 @@ function buildNode(
      return node;
    }
    case NodeType.Text:
-      return doc.createTextNode(
-        n.isStyle && hackCss
-          ? adaptCssForReplay(n.textContent, cache)
-          : n.textContent,
-      );
+      if (n.isStyle && hackCss) {
+        // support legacy style
+        return doc.createTextNode(adaptCssForReplay(n.textContent, cache));
+      }
+      return doc.createTextNode(n.textContent);
    case NodeType.CDATA:
      return doc.createCDATASection(n.textContent);
    case NodeType.Comment:
--- a/packages/rrweb-snapshot/src/snapshot.ts
+++ b/packages/rrweb-snapshot/src/snapshot.ts
@@ -27,6 +27,7 @@ import {
  toLowerCase,
  extractFileExtension,
  absolutifyURLs,
+  markCssSplits,
 } from './utils';
 import dom from '@rrweb/utils';

@@ -403,6 +404,7 @@ function serializeNode(
     * `newlyAddedElement: true` skips scrollTop and scrollLeft check
     */
    newlyAddedElement?: boolean;
+    cssCaptured?: boolean;
  },
 ): serializedNode | false {
  const {
@@ -420,6 +422,7 @@ function serializeNode(
    recordCanvas,
    keepIframeSrcFn,
    newlyAddedElement = false,
+    cssCaptured = false,
  } = options;
  // Only record root id when document object is not the base document
  const rootId = getRootId(doc, mirror);
@@ -466,6 +469,7 @@ function serializeNode(
        needsMask,
        maskTextFn,
        rootId,
+        cssCaptured,
      });
    case n.CDATA_SECTION_NODE:
      return {
@@ -497,48 +501,38 @@ function serializeTextNode(
    needsMask: boolean;
    maskTextFn: MaskTextFn | undefined;
    rootId: number | undefined;
+    cssCaptured?: boolean;
  },
 ): serializedNode {
-  const { needsMask, maskTextFn, rootId } = options;
+  const { needsMask, maskTextFn, rootId, cssCaptured } = options;
  // The parent node may not be a html element which has a tagName attribute.
  // So just let it be undefined which is ok in this use case.
  const parent = dom.parentNode(n);
  const parentTagName = parent && (parent as HTMLElement).tagName;
-  let text = dom.textContent(n);
+  let textContent: string | null = '';
  const isStyle = parentTagName === 'STYLE' ? true : undefined;
  const isScript = parentTagName === 'SCRIPT' ? true : undefined;
-  if (isStyle && text) {
-    try {
-      // try to read style sheet
-      if (n.nextSibling || n.previousSibling) {
-        // This is not the only child of the stylesheet.
-        // We can't read all of the sheet's .cssRules and expect them
-        // to _only_ include the current rule(s) added by the text node.
-        // So we'll be conservative and keep textContent as-is.
-      } else if ((parent as HTMLStyleElement).sheet?.cssRules) {
-        text = stringifyStylesheet((parent as HTMLStyleElement).sheet!);
-      }
-    } catch (err) {
-      console.warn(
-        `Cannot get CSS styles from text's parentNode. Error: ${err as string}`,
-        n,
-      );
-    }
-    text = absolutifyURLs(text, getHref(options.doc));
-  }
  if (isScript) {
-    text = 'SCRIPT_PLACEHOLDER';
+    textContent = 'SCRIPT_PLACEHOLDER';
+  } else if (!cssCaptured) {
+    textContent = dom.textContent(n);
+    if (isStyle && textContent) {
+      // mutation only: we don't need to use stringifyStylesheet
+      // as a <style> text node mutation obliterates any previous
+      // programmatic rule manipulation (.insertRule etc.)
+      // so the current textContent represents the most up to date state
+      textContent = absolutifyURLs(textContent, getHref(options.doc));
+    }
  }
-  if (!isStyle && !isScript && text && needsMask) {
-    text = maskTextFn
-      ? maskTextFn(text, dom.parentElement(n))
-      : text.replace(/[\S]/g, '*');
+  if (!isStyle && !isScript && textContent && needsMask) {
+    textContent = maskTextFn
+      ? maskTextFn(textContent, dom.parentElement(n))
+      : textContent.replace(/[\S]/g, '*');
  }

  return {
    type: NodeType.Text,
-    textContent: text || '',
-    isStyle,
+    textContent: textContent || '',
    rootId,
  };
 }
@@ -608,17 +602,14 @@ function serializeElementNode(
      attributes._cssText = cssText;
    }
  }
-  // dynamic stylesheet
-  if (
-    tagName === 'style' &&
-    (n as HTMLStyleElement).sheet &&
-    // TODO: Currently we only try to get dynamic stylesheet when it is an empty style element
-    !(n.innerText || dom.textContent(n) || '').trim().length
-  ) {
-    const cssText = stringifyStylesheet(
+  if (tagName === 'style' && (n as HTMLStyleElement).sheet) {
+    let cssText = stringifyStylesheet(
      (n as HTMLStyleElement).sheet as CSSStyleSheet,
    );
    if (cssText) {
+      if (n.childNodes.length > 1) {
+        cssText = markCssSplits(cssText, n as HTMLStyleElement);
+      }
      attributes._cssText = cssText;
    }
  }
@@ -937,6 +928,7 @@ export function serializeNodeWithId(
      node: serializedElementNodeWithId,
    ) => unknown;
    stylesheetLoadTimeout?: number;
+    cssCaptured?: boolean;
  },
 ): serializedNodeWithId | null {
  const {
@@ -962,6 +954,7 @@ export function serializeNodeWithId(
    stylesheetLoadTimeout = 5000,
    keepIframeSrcFn = () => false,
    newlyAddedElement = false,
+    cssCaptured = false,
  } = options;
  let { needsMask } = options;
  let { preserveWhiteSpace = true } = options;
@@ -992,6 +985,7 @@ export function serializeNodeWithId(
    recordCanvas,
    keepIframeSrcFn,
    newlyAddedElement,
+    cssCaptured,
  });
  if (!_serializedNode) {
    // TODO: dev only
@@ -1007,7 +1001,6 @@ export function serializeNodeWithId(
    slimDOMExcluded(_serializedNode, slimDOMOptions) ||
    (!preserveWhiteSpace &&
      _serializedNode.type === NodeType.Text &&
-      !_serializedNode.isStyle &&
      !_serializedNode.textContent.replace(/^\s+|\s+$/gm, '').length)
  ) {
    id = IGNORED_NODE;
@@ -1072,6 +1065,7 @@ export function serializeNodeWithId(
      onStylesheetLoad,
      stylesheetLoadTimeout,
      keepIframeSrcFn,
+      cssCaptured: false,
    };

    if (
@@ -1081,6 +1075,13 @@ export function serializeNodeWithId(
    ) {
      // value parameter in DOM reflects the correct value, so ignore childNode
    } else {
+      if (
+        serializedNode.type === NodeType.Element &&
+        (serializedNode as elementNode).attributes._cssText !== undefined &&
+        typeof serializedNode.attributes._cssText === 'string'
+      ) {
+        bypassOptions.cssCaptured = true;
+      }
      for (const childN of Array.from(dom.childNodes(n))) {
        const serializedChildNode = serializeNodeWithId(childN, bypassOptions);
        if (serializedChildNode) {
--- a/packages/rrweb-snapshot/src/types.ts
+++ b/packages/rrweb-snapshot/src/types.ts
@@ -20,9 +20,18 @@ export type documentTypeNode = {
  systemId: string;
 };

-export type attributes = {
-  [key: string]: string | number | true | null;
+type cssTextKeyAttr = {
+  _cssText?: string;
 };
+
+export type attributes = cssTextKeyAttr & {
+  [key: string]:
+    | string
+    | number // properties e.g. rr_scrollLeft or rr_mediaCurrentTime
+    | true // e.g. checked  on <input type="radio">
+    | null; // an indication that an attribute was removed (during a mutation)
+};
+
 export type legacyAttributes = {
  /**
   * @deprecated old bug in rrweb was causing these to always be set
@@ -45,6 +54,10 @@ export type elementNode = {
 export type textNode = {
  type: NodeType.Text;
  textContent: string;
+  /**
+   * @deprecated styles are now always snapshotted against parent <style> element
+   * style mutations can still happen via an added textNode, but they don't need this attribute for correct replay
+   */
  isStyle?: true;
 };

@@ -78,6 +91,11 @@ export type serializedElementNodeWithId = Extract<
  Record<'type', NodeType.Element>
 >;

+export type serializedTextNodeWithId = Extract<
+  serializedNodeWithId,
+  Record<'type', NodeType.Text>
+>;
+
 export type tagMap = {
  [key: string]: string;
 };
--- a/packages/rrweb-snapshot/src/utils.ts
+++ b/packages/rrweb-snapshot/src/utils.ts
@@ -99,14 +99,28 @@ export function escapeImportStatement(rule: CSSImportRule): string {
  return statement.join(' ') + ';';
 }

+/*
+ * serialize the css rules from the .sheet property
+ * for <link rel="stylesheet"> elements, this is the only way of getting the rules without a FETCH
+ * for <style> elements, this is less preferable to looking at childNodes[0].textContent
+ * (which will include vendor prefixed rules which may not be used or visible to the recorded browser,
+ * but which might be needed by the replayer browser)
+ * however, at snapshot time, we don't know whether the style element has suffered
+ * any programmatic manipulation prior to the snapshot, in which case the .sheet would be more up to date
+ */
 export function stringifyStylesheet(s: CSSStyleSheet): string | null {
  try {
    const rules = s.rules || s.cssRules;
    if (!rules) {
      return null;
    }
+    let sheetHref = s.href;
+    if (!sheetHref && s.ownerNode && s.ownerNode.ownerDocument) {
+      // an inline <style> element
+      sheetHref = s.ownerNode.ownerDocument.location.href;
+    }
    const stringifiedRules = Array.from(rules, (rule: CSSRule) =>
-      stringifyRule(rule, s.href),
+      stringifyRule(rule, sheetHref),
    ).join('');
    return fixBrowserCompatibilityIssuesInCSS(stringifiedRules);
  } catch (error) {
@@ -428,3 +442,62 @@ export function absolutifyURLs(cssText: string | null, href: string): string {
    },
  );
 }
+
+/**
+ * Intention is to normalize by remove spaces, semicolons and CSS comments
+ * so that we can compare css as authored vs. output of stringifyStylesheet
+ */
+export function normalizeCssString(cssText: string): string {
+  return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
+}
+
+/**
+ * Maps the output of stringifyStylesheet to individual text nodes of a <style> element
+ * performance is not considered as this is anticipated to be very much an edge case
+ * (javascript is needed to add extra text nodes to a <style>)
+ */
+export function splitCssText(
+  cssText: string,
+  style: HTMLStyleElement,
+): string[] {
+  const childNodes = Array.from(style.childNodes);
+  const splits: string[] = [];
+  if (childNodes.length > 1 && cssText && typeof cssText === 'string') {
+    const cssTextNorm = normalizeCssString(cssText);
+    for (let i = 1; i < childNodes.length; i++) {
+      if (
+        childNodes[i].textContent &&
+        typeof childNodes[i].textContent === 'string'
+      ) {
+        const textContentNorm = normalizeCssString(childNodes[i].textContent!);
+        for (let j = 3; j < textContentNorm.length; j++) {
+          // find a  substring that appears only once
+          const bit = textContentNorm.substring(0, j);
+          if (cssTextNorm.split(bit).length === 2) {
+            const splitNorm = cssTextNorm.indexOf(bit);
+            // find the split point in the original text
+            for (let k = splitNorm; k < cssText.length; k++) {
+              if (
+                normalizeCssString(cssText.substring(0, k)).length === splitNorm
+              ) {
+                splits.push(cssText.substring(0, k));
+                cssText = cssText.substring(k);
+                break;
+              }
+            }
+            break;
+          }
+        }
+      }
+    }
+  }
+  splits.push(cssText); // either the full thing if no splits were found, or the last split
+  return splits;
+}
+
+export function markCssSplits(
+  cssText: string,
+  style: HTMLStyleElement,
+): string {
+  return splitCssText(cssText, style).join('/* rr_split */');
+}