import type { MaskInputOptions, SlimDOMOptions, MaskTextFn, MaskInputFn, KeepIframeSrcFn, ICanvas, DialogAttributes, } from './types'; import { NodeType } from '@rrweb/types'; import type { serializedNode, serializedNodeWithId, serializedElementNodeWithId, elementNode, attributes, mediaAttributes, DataURLOptions, } from '@rrweb/types'; import { Mirror, is2DCanvasBlank, isElement, isShadowRoot, maskInputValue, isNativeShadowDom, stringifyStylesheet, getInputType, toLowerCase, extractFileExtension, absolutifyURLs, markCssSplits, } from './utils'; import dom from '@rrweb/utils'; let _id = 1; const tagNameRegex = new RegExp('[^a-z0-9-_:]'); export const IGNORED_NODE = -2; export function genId(): number { return _id++; } function getValidTagName(element: HTMLElement): Lowercase { if (element instanceof HTMLFormElement) { return 'form'; } const processedTagName = toLowerCase(element.tagName); if (tagNameRegex.test(processedTagName)) { // if the tag name is odd and we cannot extract // anything from the string, then we return a // generic div return 'div'; } return processedTagName; } let canvasService: HTMLCanvasElement | null; let canvasCtx: CanvasRenderingContext2D | null; // eslint-disable-next-line no-control-regex const SRCSET_NOT_SPACES = /^[^ \t\n\r\u000c]+/; // Don't use \s, to avoid matching non-breaking space // eslint-disable-next-line no-control-regex const SRCSET_COMMAS_OR_SPACES = /^[, \t\n\r\u000c]+/; function getAbsoluteSrcsetString(doc: Document, attributeValue: string) { /* run absoluteToDoc over every url in the srcset this is adapted from https://github.com/albell/parse-srcset/ without the parsing of the descriptors (we return these as-is) parce-srcset is in turn based on https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute */ if (attributeValue.trim() === '') { return attributeValue; } let pos = 0; function collectCharacters(regEx: RegExp) { let chars: string; const match = regEx.exec(attributeValue.substring(pos)); if (match) { chars = match[0]; pos += chars.length; return chars; } return ''; } const output = []; // eslint-disable-next-line no-constant-condition while (true) { collectCharacters(SRCSET_COMMAS_OR_SPACES); if (pos >= attributeValue.length) { break; } // don't split on commas within urls let url = collectCharacters(SRCSET_NOT_SPACES); if (url.slice(-1) === ',') { // aside: according to spec more than one comma at the end is a parse error, but we ignore that url = absoluteToDoc(doc, url.substring(0, url.length - 1)); // the trailing comma splits the srcset, so the interpretion is that // another url will follow, and the descriptor is empty output.push(url); } else { let descriptorsStr = ''; url = absoluteToDoc(doc, url); let inParens = false; // eslint-disable-next-line no-constant-condition while (true) { const c = attributeValue.charAt(pos); if (c === '') { output.push((url + descriptorsStr).trim()); break; } else if (!inParens) { if (c === ',') { pos += 1; output.push((url + descriptorsStr).trim()); break; // parse the next url } else if (c === '(') { inParens = true; } } else { // in parenthesis; ignore commas // (parenthesis may be supported by future additions to spec) if (c === ')') { inParens = false; } } descriptorsStr += c; pos += 1; } } } return output.join(', '); } const cachedDocument = new WeakMap(); export function absoluteToDoc(doc: Document, attributeValue: string): string { if (!attributeValue || attributeValue.trim() === '') { return attributeValue; } return getHref(doc, attributeValue); } function isSVGElement(el: Element): boolean { return Boolean(el.tagName === 'svg' || (el as SVGElement).ownerSVGElement); } function getHref(doc: Document, customHref?: string) { let a = cachedDocument.get(doc); if (!a) { a = doc.createElement('a'); cachedDocument.set(doc, a); } if (!customHref) { customHref = ''; } else if (customHref.startsWith('blob:') || customHref.startsWith('data:')) { return customHref; } // note: using `new URL` is slower. See #1434 or https://jsbench.me/uqlud17rxo/1 a.setAttribute('href', customHref); return a.href; } export function transformAttribute( doc: Document, tagName: Lowercase, name: Lowercase, value: string | null, ): string | null { if (!value) { return value; } // relative path in attribute if ( name === 'src' || (name === 'href' && !(tagName === 'use' && value[0] === '#')) ) { // href starts with a # is an id pointer for svg return absoluteToDoc(doc, value); } else if (name === 'xlink:href' && value[0] !== '#') { // xlink:href starts with # is an id pointer return absoluteToDoc(doc, value); } else if ( name === 'background' && (tagName === 'table' || tagName === 'td' || tagName === 'th') ) { return absoluteToDoc(doc, value); } else if (name === 'srcset') { return getAbsoluteSrcsetString(doc, value); } else if (name === 'style') { return absolutifyURLs(value, getHref(doc)); } else if (tagName === 'object' && name === 'data') { return absoluteToDoc(doc, value); } return value; } export function ignoreAttribute( tagName: string, name: string, // eslint-disable-next-line @typescript-eslint/no-unused-vars _value: unknown, ): boolean { return (tagName === 'video' || tagName === 'audio') && name === 'autoplay'; } export function _isBlockedElement( element: HTMLElement, blockClass: string | RegExp, blockSelector: string | null, ): boolean { try { if (typeof blockClass === 'string') { if (element.classList.contains(blockClass)) { return true; } } else { for (let eIndex = element.classList.length; eIndex--; ) { const className = element.classList[eIndex]; if (blockClass.test(className)) { return true; } } } if (blockSelector) { return element.matches(blockSelector); } } catch (e) { // } return false; } export function classMatchesRegex( node: Node | null, regex: RegExp, checkAncestors: boolean, ): boolean { if (!node) return false; if (node.nodeType !== node.ELEMENT_NODE) { if (!checkAncestors) return false; return classMatchesRegex(dom.parentNode(node), regex, checkAncestors); } for (let eIndex = (node as HTMLElement).classList.length; eIndex--; ) { const className = (node as HTMLElement).classList[eIndex]; if (regex.test(className)) { return true; } } if (!checkAncestors) return false; return classMatchesRegex(dom.parentNode(node), regex, checkAncestors); } export function needMaskingText( node: Node, maskTextClass: string | RegExp, maskTextSelector: string | null, checkAncestors: boolean, ): boolean { let el: Element; if (isElement(node)) { el = node; if (!dom.childNodes(el).length) { // optimisation: we can avoid any of the below checks on leaf elements // as masking is applied to child text nodes only return false; } } else if (dom.parentElement(node) === null) { // should warn? maybe a text node isn't attached to a parent node yet? return false; } else { el = dom.parentElement(node)!; } try { if (typeof maskTextClass === 'string') { if (checkAncestors) { if (el.closest(`.${maskTextClass}`)) return true; } else { if (el.classList.contains(maskTextClass)) return true; } } else { if (classMatchesRegex(el, maskTextClass, checkAncestors)) return true; } if (maskTextSelector) { if (checkAncestors) { if (el.closest(maskTextSelector)) return true; } else { if (el.matches(maskTextSelector)) return true; } } } catch (e) { // } return false; } // https://stackoverflow.com/a/36155560 function onceIframeLoaded( iframeEl: HTMLIFrameElement, listener: () => unknown, iframeLoadTimeout: number, ) { const win = iframeEl.contentWindow; if (!win) { return; } // document is loading let fired = false; let readyState: DocumentReadyState; try { readyState = win.document.readyState; } catch (error) { return; } if (readyState !== 'complete') { const timer = setTimeout(() => { if (!fired) { listener(); fired = true; } }, iframeLoadTimeout); iframeEl.addEventListener('load', () => { clearTimeout(timer); fired = true; listener(); }); return; } // check blank frame for Chrome const blankUrl = 'about:blank'; if ( win.location.href !== blankUrl || iframeEl.src === blankUrl || iframeEl.src === '' ) { // iframe was already loaded, make sure we wait to trigger the listener // till _after_ the mutation that found this iframe has had time to process setTimeout(listener, 0); return iframeEl.addEventListener('load', listener); // keep listing for future loads } // use default listener iframeEl.addEventListener('load', listener); } function onceStylesheetLoaded( link: HTMLLinkElement, listener: () => unknown, styleSheetLoadTimeout: number, ) { let fired = false; let styleSheetLoaded: StyleSheet | null; try { styleSheetLoaded = link.sheet; } catch (error) { return; } if (styleSheetLoaded) return; const timer = setTimeout(() => { if (!fired) { listener(); fired = true; } }, styleSheetLoadTimeout); link.addEventListener('load', () => { clearTimeout(timer); fired = true; listener(); }); } function serializeNode( n: Node, options: { doc: Document; mirror: Mirror; blockClass: string | RegExp; blockSelector: string | null; needsMask: boolean; inlineStylesheet: boolean; maskInputOptions: MaskInputOptions; maskTextFn: MaskTextFn | undefined; maskInputFn: MaskInputFn | undefined; dataURLOptions?: DataURLOptions; inlineImages: boolean; recordCanvas: boolean; keepIframeSrcFn: KeepIframeSrcFn; /** * `newlyAddedElement: true` skips scrollTop and scrollLeft check */ newlyAddedElement?: boolean; cssCaptured?: boolean; }, ): serializedNode | false { const { doc, mirror, blockClass, blockSelector, needsMask, inlineStylesheet, maskInputOptions = {}, maskTextFn, maskInputFn, dataURLOptions = {}, inlineImages, recordCanvas, keepIframeSrcFn, newlyAddedElement = false, cssCaptured = false, } = options; // Only record root id when document object is not the base document const rootId = getRootId(doc, mirror); switch (n.nodeType) { case n.DOCUMENT_NODE: if ((n as Document).compatMode !== 'CSS1Compat') { return { type: NodeType.Document, childNodes: [], compatMode: (n as Document).compatMode, // probably "BackCompat" }; } else { return { type: NodeType.Document, childNodes: [], }; } case n.DOCUMENT_TYPE_NODE: return { type: NodeType.DocumentType, name: (n as DocumentType).name, publicId: (n as DocumentType).publicId, systemId: (n as DocumentType).systemId, rootId, }; case n.ELEMENT_NODE: return serializeElementNode(n as HTMLElement, { doc, blockClass, blockSelector, inlineStylesheet, maskInputOptions, maskInputFn, dataURLOptions, inlineImages, recordCanvas, keepIframeSrcFn, newlyAddedElement, rootId, }); case n.TEXT_NODE: return serializeTextNode(n as Text, { doc, needsMask, maskTextFn, rootId, cssCaptured, }); case n.CDATA_SECTION_NODE: return { type: NodeType.CDATA, textContent: '', rootId, }; case n.COMMENT_NODE: return { type: NodeType.Comment, textContent: dom.textContent(n as Comment) || '', rootId, }; default: return false; } } function getRootId(doc: Document, mirror: Mirror): number | undefined { if (!mirror.hasNode(doc)) return undefined; const docId = mirror.getId(doc); return docId === 1 ? undefined : docId; } function serializeTextNode( n: Text, options: { doc: Document; needsMask: boolean; maskTextFn: MaskTextFn | undefined; rootId: number | undefined; cssCaptured?: boolean; }, ): serializedNode { const { needsMask, maskTextFn, rootId, cssCaptured } = options; // The parent node may not be a html element which has a tagName attribute. // So just let it be undefined which is ok in this use case. const parent = dom.parentNode(n); const parentTagName = parent && (parent as HTMLElement).tagName; let textContent: string | null = ''; const isStyle = parentTagName === 'STYLE' ? true : undefined; const isScript = parentTagName === 'SCRIPT' ? true : undefined; if (isScript) { textContent = 'SCRIPT_PLACEHOLDER'; } else if (!cssCaptured) { textContent = dom.textContent(n); if (isStyle && textContent) { // mutation only: we don't need to use stringifyStylesheet // as a