Fix splitCssText again (#1640)

Fixes a browser 'lock up' at record time due to a presence of large amounts of css in <style> elements, which are split over multiple text nodes, which triggers the new code added in #1437 (see that PR for full explanation of why this all exists). #1437 was not written with performance in mind as it was believed to be an edge case, but things like Grammarly browser extension (#1603) among other scenarios were triggering pathological behavior, some of which was solved in #1615. See also https://github.com/rrweb-io/rrweb/pull/1640#issuecomment-2633505804 for further discussion. * Fix the case when there are multiple matches and we end up not finding a unique one - just go with the best guess when there are many splits by looking at the previous chunk's size * Also add '0px' -> '0' stylesheet normalization, which also fixes the sample problem in a different way * Add new test and modify it so that it can trigger a failure in the absence of the '0px' normalization; there may be other unknown ways of triggering a similar bug, so ensure that the primary 'best guess' method doesn't suffer a regression * Leverage the 'best guess' method so that we can quit after 100 iterations trying to find a unique substring; hopefully this bit along with the `iterLimit` already added will prevent any future pathological cases. Failing example extracted from large files identified by Paul D'Ambra (Posthog) ... see comment from MartinWorkfully: https://github.com/PostHog/posthog-js/issues/1668
2026-04-01 12:00:00 +08:00
parent 041a2e237f
commit 636b02780e
3 changed files with 151 additions and 16 deletions
--- a/.changeset/efficiently-splitCssText-1640.md
+++ b/.changeset/efficiently-splitCssText-1640.md
@@ -0,0 +1,6 @@
 ---
 "rrweb-snapshot": patch
 "rrweb": patch
 ---
 Improve performance of splitCssText for <style> elements with large css content - see #1603
--- a/packages/rrweb-snapshot/src/utils.ts
+++ b/packages/rrweb-snapshot/src/utils.ts
@@ -450,8 +450,19 @@ export function absolutifyURLs(cssText: string | null, href: string): string {
 * Intention is to normalize by remove spaces, semicolons and CSS comments
 * so that we can compare css as authored vs. output of stringifyStylesheet
 */
-export function normalizeCssString(cssText: string): string {
+export function normalizeCssString(
  cssText: string,
  /**
   * _testNoPxNorm: only used as part of the 'substring matching going from many to none'
   * test case so that it will trigger a failure if the conditions that let to the creation of that test arise again
   */
  _testNoPxNorm = false,
 ): string {
  if (_testNoPxNorm) {
    return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '');
  } else {
    return cssText.replace(/(\/\*[^*]*\*\/)|[\s;]/g, '').replace(/0px/g, '0');
  }
 }
 /**
@@ -463,19 +474,24 @@ export function normalizeCssString(cssText: string): string {
 export function splitCssText(
  cssText: string,
  style: HTMLStyleElement,
  _testNoPxNorm = false,
 ): string[] {
  const childNodes = Array.from(style.childNodes);
  const splits: string[] = [];
-  let iterLimit = 0;
+  let iterCount = 0;
  if (childNodes.length > 1 && cssText && typeof cssText === 'string') {
-    let cssTextNorm = normalizeCssString(cssText);
+    let cssTextNorm = normalizeCssString(cssText, _testNoPxNorm);
    const normFactor = cssTextNorm.length / cssText.length;
    for (let i = 1; i < childNodes.length; i++) {
      if (
        childNodes[i].textContent &&
        typeof childNodes[i].textContent === 'string'
      ) {
-        const textContentNorm = normalizeCssString(childNodes[i].textContent!);
+        const textContentNorm = normalizeCssString(
          childNodes[i].textContent!,
          _testNoPxNorm,
        );
        const jLimit = 100; // how many iterations for the first part of searching
        let j = 3;
        for (; j < textContentNorm.length; j++) {
          if (
@@ -489,31 +505,62 @@ export function splitCssText(
          break;
        }
        for (; j < textContentNorm.length; j++) {
-          const bit = textContentNorm.substring(0, j);
+          let startSubstring = textContentNorm.substring(0, j);
          // this substring should appears only once in overall text too
-          const bits = cssTextNorm.split(bit);
+          let cssNormSplits = cssTextNorm.split(startSubstring);
          let splitNorm = -1;
-          if (bits.length === 2) {
+          if (cssNormSplits.length === 2) {
-            splitNorm = cssTextNorm.indexOf(bit);
+            splitNorm = cssNormSplits[0].length;
          } else if (
-            bits.length > 2 &&
+            cssNormSplits.length > 2 &&
-            bits[0] === '' &&
+            cssNormSplits[0] === '' &&
            childNodes[i - 1].textContent !== ''
          ) {
            // this childNode has same starting content as previous
-            splitNorm = cssTextNorm.indexOf(bit, 1);
+            splitNorm = cssTextNorm.indexOf(startSubstring, 1);
          } else if (cssNormSplits.length === 1) {
            // try to roll back to get multiple matches again
            startSubstring = startSubstring.substring(
              0,
              startSubstring.length - 1,
            );
            cssNormSplits = cssTextNorm.split(startSubstring);
            if (cssNormSplits.length <= 1) {
              // no split possible
              splits.push(cssText);
              return splits;
            }
            j = jLimit + 1; // trigger end of search
          } else if (j === textContentNorm.length - 1) {
            // we're about to end loop without a split point
            splitNorm = cssTextNorm.indexOf(startSubstring);
          }
          if (cssNormSplits.length >= 2 && j > jLimit) {
            const prevTextContent = childNodes[i - 1].textContent;
            if (prevTextContent && typeof prevTextContent === 'string') {
              // pick the first matching point which respects the previous chunk's approx size
              const prevMinLength = normalizeCssString(prevTextContent).length;
              splitNorm = cssTextNorm.indexOf(startSubstring, prevMinLength);
            }
            if (splitNorm === -1) {
              // fall back to pick the first matching point of many
              splitNorm = cssNormSplits[0].length;
            }
          }
          if (splitNorm !== -1) {
            // find the split point in the original text
            let k = Math.floor(splitNorm / normFactor);
            for (; k > 0 && k < cssText.length; ) {
-              iterLimit += 1;
+              iterCount += 1;
-              if (iterLimit > 50 * childNodes.length) {
+              if (iterCount > 50 * childNodes.length) {
                // quit for performance purposes
                splits.push(cssText);
                return splits;
              }
-              const normPart = normalizeCssString(cssText.substring(0, k));
+              const normPart = normalizeCssString(
                cssText.substring(0, k),
                _testNoPxNorm,
              );
              if (normPart.length === splitNorm) {
                splits.push(cssText.substring(0, k));
                cssText = cssText.substring(k);
--- a/packages/rrweb-snapshot/test/css.test.ts
+++ b/packages/rrweb-snapshot/test/css.test.ts
@@ -178,7 +178,6 @@ describe('css splitter', () => {
  transition: all 4s ease;
 }`),
      );
      // TODO: splitCssText can't handle it yet if both start with .x
      style.appendChild(
        JSDOM.fragment(`.y {
  -moz-transition: all 5s ease;
@@ -227,6 +226,89 @@ describe('css splitter', () => {
    }
    expect(splitCssText(cssText, style)).toEqual(sections);
  });
  it('finds css textElement splits correctly, with substring matching going from many to none', () => {
    const window = new Window({ url: 'https://localhost:8080' });
    const document = window.document;
    document.head.innerHTML = `<style>
 .section-news-v3-detail .news-cnt-wrapper :where(p):not(:where([class~="not-prose"], [class~="not-prose"] *)) {
    margin-top: 0px;
    margin-bottom: 0px;
 }
 .section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(figure):not(:where([class~="not-prose"],[class~="not-prose"] *)) {
    margin-top: 2em;
    margin-bottom: 2em;
 }
 .section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose > :first-child):not(:where([class~="not-prose"],[cl</style>`;
    const style = document.querySelector('style');
    if (style) {
      // happydom? bug avoid: strangely a greater than symbol in the template string below
      // e.g. '.prose > :last-child' causes more than one child to be appended
      style.append(`ass~="not-prose"] *)) {
    margin-top: 0;  /* cssRules transforms this to '0px' which was preventing matching prior to normalization */
 }
 .section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose :last-child):not(:where([class~="not-prose"],[class~="not-prose"] *)) {
    margin-bottom: 0;
 }
 .section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 {
    width: 100%;
    overflow-wrap: break-word;
 }
 .section-home {
    height: 100%;
    overflow-y: auto;
 }
 `);
      expect(style.childNodes.length).toEqual(2);
      const expected = [
        '.section-news-v3-detail .news-cnt-wrapper :where(p):not(:where([class~="not-prose"], [class~="not-prose"] *)) { margin-top: 0px; margin-bottom: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(figure):not(:where([class~="not-prose"],[class~="not-prose"] *)) { margin-top: 2em; margin-bottom: 2em; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose > :first-child):not(:where([class~="not-prose"],[cl',
        'ass~="not-prose"] *)) { margin-top: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 :where(.prose :last-child):not(:where([class~="not-prose"],[class~="not-prose"] *)) { margin-bottom: 0px; }.section-news-v3-detail .news-cnt-wrapper .plugins-wrapper2 { width: 100%; overflow-wrap: break-word; }.section-home { height: 100%; overflow-y: auto; }',
      ];
      const browserSheet = expected.join('');
      expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet);
      let _testNoPxNorm = true; // trigger the original motivating scenario for this test
      expect(splitCssText(browserSheet, style, _testNoPxNorm)).toEqual(
        expected,
      );
      _testNoPxNorm = false; // this case should also be solved by normalizing '0px' -> '0'
      expect(splitCssText(browserSheet, style, _testNoPxNorm)).toEqual(
        expected,
      );
    }
  });
  it('finds css textElement splits correctly, even with repeated sections', () => {
    const window = new Window({ url: 'https://localhost:8080' });
    const document = window.document;
    document.head.innerHTML =
      '<style>.a{background-color: black; }        </style>';
    const style = document.querySelector('style');
    if (style) {
      style.append('.x{background-color:red;}');
      style.append('.b      {background-color:black;}');
      style.append('.x{background-color:red;}');
      style.append('.c{      background-color:                     black}');
      const expected = [
        '.a { background-color: black; }',
        '.x { background-color: red; }',
        '.b { background-color: black; }',
        '.x { background-color: red; }',
        '.c { background-color: black; }',
      ];
      const browserSheet = expected.join('');
      expect(stringifyStylesheet(style.sheet!)).toEqual(browserSheet);
      expect(splitCssText(browserSheet, style)).toEqual(expected);
    }
  });
 });
 describe('applyCssSplits css rejoiner', function () {