Add a slimDOM option to strip out unnecessary parts of the DOM (#36)
* Add a `slimDOM` option to strip out unnecessary parts of the DOM in terms of replay - <script> tags in the <head> take up unnecessary storage space and are often injected semi randomly to become a source of unnecessary variation between recordings of the same thing - comment tags can be stripped out without affecting display - future: this option could also turn on more aggressive stripping, e.g. elements that are hidden by CSS (assuming we can handle them becoming visible after mutation events) * Mark nodes ignored due to slimDOM option, so that they can also be ignored by the mutation observer in rrweb * Introducing the `ignored` attribute violates the `serializedNodeWithId` type * slimDOM: Strip out whitespace nodes from <head> element as they have no effect but take up space - these would otherwise have to be merged after <script> elements are removed; for statcounter usecase, removing <script> elements is no good if there is still a trace of their presence due to the white space (and hence a variant <head> node is still produced) - I explored a more radical stripping of all white space nodes, but there is a problem if parent node is <pre> or otherwise rendered with `white-space: pre` and similar. detecting applied styles with getComputedStyle would be very expensive (I haven't measured it though) * Export IGNORED_NODE as a constant instead of relying on the hard-to-grok `-2` * Remove <link rel=preload as=script> which are similarly as useless as <script> tags * Make slimDOM configurable with the expecations that `slimDOMOptions: true` will only enable non-destructive options (so not all options may be turned on) * Expand slimDOM to add options to remove more elements from the <head> that should not be necessary in the replayer context
This commit is contained in:
@@ -2,6 +2,7 @@ import snapshot, {
|
|||||||
serializeNodeWithId,
|
serializeNodeWithId,
|
||||||
transformAttribute,
|
transformAttribute,
|
||||||
visitSnapshot,
|
visitSnapshot,
|
||||||
|
IGNORED_NODE,
|
||||||
} from './snapshot';
|
} from './snapshot';
|
||||||
import rebuild, { buildNodeWithSN, addHoverClass } from './rebuild';
|
import rebuild, { buildNodeWithSN, addHoverClass } from './rebuild';
|
||||||
export * from './types';
|
export * from './types';
|
||||||
@@ -14,4 +15,5 @@ export {
|
|||||||
addHoverClass,
|
addHoverClass,
|
||||||
transformAttribute,
|
transformAttribute,
|
||||||
visitSnapshot,
|
visitSnapshot,
|
||||||
|
IGNORED_NODE,
|
||||||
};
|
};
|
||||||
|
|||||||
124
src/snapshot.ts
124
src/snapshot.ts
@@ -6,11 +6,14 @@ import {
|
|||||||
INode,
|
INode,
|
||||||
idNodeMap,
|
idNodeMap,
|
||||||
MaskInputOptions,
|
MaskInputOptions,
|
||||||
|
SlimDOMOptions,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
|
||||||
let _id = 1;
|
let _id = 1;
|
||||||
const tagNameRegex = RegExp('[^a-z1-6-_]');
|
const tagNameRegex = RegExp('[^a-z1-6-_]');
|
||||||
|
|
||||||
|
export const IGNORED_NODE = -2;
|
||||||
|
|
||||||
function genId(): number {
|
function genId(): number {
|
||||||
return _id++;
|
return _id++;
|
||||||
}
|
}
|
||||||
@@ -323,6 +326,83 @@ function serializeNode(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function lowerIfExists(maybeAttr : string | number | boolean) : string {
|
||||||
|
if (maybeAttr === undefined) {
|
||||||
|
return '';
|
||||||
|
} else {
|
||||||
|
return (maybeAttr as string).toLowerCase();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function slimDOMExcluded(sn: serializedNode, slimDOMOptions: SlimDOMOptions): boolean {
|
||||||
|
if (slimDOMOptions.comment && sn.type === NodeType.Comment) {
|
||||||
|
// TODO: convert IE conditional comments to real nodes
|
||||||
|
return true;
|
||||||
|
} else if (sn.type === NodeType.Element) {
|
||||||
|
if (slimDOMOptions.script &&
|
||||||
|
(sn.tagName === 'script' ||
|
||||||
|
(sn.tagName === 'link' && sn.attributes.rel === 'preload' && sn.attributes['as'] === 'script')
|
||||||
|
)) {
|
||||||
|
return true;
|
||||||
|
} else if (slimDOMOptions.headFavicon && (
|
||||||
|
(sn.tagName === 'link' && sn.attributes.rel === 'shortcut icon')
|
||||||
|
|| (sn.tagName === 'meta' && (
|
||||||
|
lowerIfExists(sn.attributes['name']).match(/^msapplication-tile(image|color)$/)
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'application-name'
|
||||||
|
|| lowerIfExists(sn.attributes['rel']) === 'icon'
|
||||||
|
|| lowerIfExists(sn.attributes['rel']) === 'apple-touch-icon'
|
||||||
|
|| lowerIfExists(sn.attributes['rel']) === 'shortcut icon'
|
||||||
|
)))) {
|
||||||
|
return true;
|
||||||
|
} else if (sn.tagName === 'meta') {
|
||||||
|
if (slimDOMOptions.headMetaDescKeywords && (
|
||||||
|
lowerIfExists(sn.attributes['name']).match(/^description|keywords$/)
|
||||||
|
)) {
|
||||||
|
return true;
|
||||||
|
} else if (slimDOMOptions.headMetaSocial && (
|
||||||
|
lowerIfExists(sn.attributes['property']).match(/^(og|twitter|fb):/) // og = opengraph (facebook)
|
||||||
|
|| lowerIfExists(sn.attributes['name']).match(/^(og|twitter):/)
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'pinterest'
|
||||||
|
)) {
|
||||||
|
return true;
|
||||||
|
} else if (slimDOMOptions.headMetaRobots && (
|
||||||
|
lowerIfExists(sn.attributes['name']) === 'robots'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'googlebot'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'bingbot'
|
||||||
|
)) {
|
||||||
|
return true;
|
||||||
|
} else if (slimDOMOptions.headMetaHttpEquiv && (
|
||||||
|
sn.attributes['http-equiv'] !== undefined
|
||||||
|
)) {
|
||||||
|
// e.g. X-UA-Compatible, Content-Type, Content-Language,
|
||||||
|
// cache-control, X-Translated-By
|
||||||
|
return true;
|
||||||
|
} else if (slimDOMOptions.headMetaAuthorship && (
|
||||||
|
lowerIfExists(sn.attributes['name']) === 'author'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'generator'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'framework'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'publisher'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'progid'
|
||||||
|
|| lowerIfExists(sn.attributes['property']).match(/^article:/)
|
||||||
|
|| lowerIfExists(sn.attributes['property']).match(/^product:/)
|
||||||
|
)) {
|
||||||
|
return true;
|
||||||
|
} else if (slimDOMOptions.headMetaVerification && (
|
||||||
|
lowerIfExists(sn.attributes['name']) === 'google-site-verification'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'yandex-verification'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'csrf-token'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'p:domain_verify'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'verify-v1'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'verification'
|
||||||
|
|| lowerIfExists(sn.attributes['name']) === 'shopify-checkout-api-token'
|
||||||
|
)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
export function serializeNodeWithId(
|
export function serializeNodeWithId(
|
||||||
n: Node | INode,
|
n: Node | INode,
|
||||||
doc: Document,
|
doc: Document,
|
||||||
@@ -331,7 +411,9 @@ export function serializeNodeWithId(
|
|||||||
skipChild = false,
|
skipChild = false,
|
||||||
inlineStylesheet = true,
|
inlineStylesheet = true,
|
||||||
maskInputOptions?: MaskInputOptions,
|
maskInputOptions?: MaskInputOptions,
|
||||||
|
slimDOMOptions: SlimDOMOptions = {},
|
||||||
recordCanvas?: boolean,
|
recordCanvas?: boolean,
|
||||||
|
preserveWhiteSpace = true,
|
||||||
): serializedNodeWithId | null {
|
): serializedNodeWithId | null {
|
||||||
const _serializedNode = serializeNode(
|
const _serializedNode = serializeNode(
|
||||||
n,
|
n,
|
||||||
@@ -346,15 +428,26 @@ export function serializeNodeWithId(
|
|||||||
console.warn(n, 'not serialized');
|
console.warn(n, 'not serialized');
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
let id;
|
let id;
|
||||||
// Try to reuse the previous id
|
// Try to reuse the previous id
|
||||||
if ('__sn' in n) {
|
if ('__sn' in n) {
|
||||||
id = n.__sn.id;
|
id = n.__sn.id;
|
||||||
|
} else if (slimDOMExcluded(_serializedNode, slimDOMOptions) ||
|
||||||
|
(!preserveWhiteSpace &&
|
||||||
|
_serializedNode.type === NodeType.Text &&
|
||||||
|
!_serializedNode.isStyle &&
|
||||||
|
!_serializedNode.textContent.replace(/^\s+|\s+$/gm,'').length
|
||||||
|
)) {
|
||||||
|
id = IGNORED_NODE;
|
||||||
} else {
|
} else {
|
||||||
id = genId();
|
id = genId();
|
||||||
}
|
}
|
||||||
const serializedNode = Object.assign(_serializedNode, { id });
|
const serializedNode = Object.assign(_serializedNode, { id });
|
||||||
(n as INode).__sn = serializedNode;
|
(n as INode).__sn = serializedNode;
|
||||||
|
if (id === IGNORED_NODE) {
|
||||||
|
return null; // slimDOM
|
||||||
|
}
|
||||||
map[id] = n as INode;
|
map[id] = n as INode;
|
||||||
let recordChild = !skipChild;
|
let recordChild = !skipChild;
|
||||||
if (serializedNode.type === NodeType.Element) {
|
if (serializedNode.type === NodeType.Element) {
|
||||||
@@ -367,6 +460,14 @@ export function serializeNodeWithId(
|
|||||||
serializedNode.type === NodeType.Element) &&
|
serializedNode.type === NodeType.Element) &&
|
||||||
recordChild
|
recordChild
|
||||||
) {
|
) {
|
||||||
|
if (
|
||||||
|
(slimDOMOptions.headWhitespace &&
|
||||||
|
_serializedNode.type === NodeType.Element &&
|
||||||
|
_serializedNode.tagName == 'head')
|
||||||
|
// would impede performance: || getComputedStyle(n)['white-space'] === 'normal'
|
||||||
|
) {
|
||||||
|
preserveWhiteSpace = false;
|
||||||
|
}
|
||||||
for (const childN of Array.from(n.childNodes)) {
|
for (const childN of Array.from(n.childNodes)) {
|
||||||
const serializedChildNode = serializeNodeWithId(
|
const serializedChildNode = serializeNodeWithId(
|
||||||
childN,
|
childN,
|
||||||
@@ -376,7 +477,9 @@ export function serializeNodeWithId(
|
|||||||
skipChild,
|
skipChild,
|
||||||
inlineStylesheet,
|
inlineStylesheet,
|
||||||
maskInputOptions,
|
maskInputOptions,
|
||||||
|
slimDOMOptions,
|
||||||
recordCanvas,
|
recordCanvas,
|
||||||
|
preserveWhiteSpace,
|
||||||
);
|
);
|
||||||
if (serializedChildNode) {
|
if (serializedChildNode) {
|
||||||
serializedNode.childNodes.push(serializedChildNode);
|
serializedNode.childNodes.push(serializedChildNode);
|
||||||
@@ -391,6 +494,7 @@ function snapshot(
|
|||||||
blockClass: string | RegExp = 'rr-block',
|
blockClass: string | RegExp = 'rr-block',
|
||||||
inlineStylesheet = true,
|
inlineStylesheet = true,
|
||||||
maskAllInputsOrOptions: boolean | MaskInputOptions,
|
maskAllInputsOrOptions: boolean | MaskInputOptions,
|
||||||
|
slimDOMSensibleOrOptions: boolean | SlimDOMOptions,
|
||||||
recordCanvas?: boolean,
|
recordCanvas?: boolean,
|
||||||
): [serializedNodeWithId | null, idNodeMap] {
|
): [serializedNodeWithId | null, idNodeMap] {
|
||||||
const idNodeMap: idNodeMap = {};
|
const idNodeMap: idNodeMap = {};
|
||||||
@@ -416,6 +520,25 @@ function snapshot(
|
|||||||
: maskAllInputsOrOptions === false
|
: maskAllInputsOrOptions === false
|
||||||
? {}
|
? {}
|
||||||
: maskAllInputsOrOptions;
|
: maskAllInputsOrOptions;
|
||||||
|
const slimDOMOptions: SlimDOMOptions =
|
||||||
|
(slimDOMSensibleOrOptions === true ||
|
||||||
|
slimDOMSensibleOrOptions === 'all')
|
||||||
|
// if true: set of sensible options that should not throw away any information
|
||||||
|
? {
|
||||||
|
script: true,
|
||||||
|
comment: true,
|
||||||
|
headFavicon: true,
|
||||||
|
headWhitespace: true,
|
||||||
|
headMetaDescKeywords: slimDOMSensibleOrOptions === 'all', // destructive
|
||||||
|
headMetaSocial: true,
|
||||||
|
headMetaRobots: true,
|
||||||
|
headMetaHttpEquiv: true,
|
||||||
|
headMetaAuthorship: true,
|
||||||
|
headMetaVerification: true,
|
||||||
|
}
|
||||||
|
: slimDOMSensibleOrOptions === false
|
||||||
|
? {}
|
||||||
|
: slimDOMSensibleOrOptions;
|
||||||
return [
|
return [
|
||||||
serializeNodeWithId(
|
serializeNodeWithId(
|
||||||
n,
|
n,
|
||||||
@@ -425,6 +548,7 @@ function snapshot(
|
|||||||
false,
|
false,
|
||||||
inlineStylesheet,
|
inlineStylesheet,
|
||||||
maskInputOptions,
|
maskInputOptions,
|
||||||
|
slimDOMOptions,
|
||||||
recordCanvas,
|
recordCanvas,
|
||||||
),
|
),
|
||||||
idNodeMap,
|
idNodeMap,
|
||||||
|
|||||||
13
src/types.ts
13
src/types.ts
@@ -87,3 +87,16 @@ export type MaskInputOptions = Partial<{
|
|||||||
textarea: boolean;
|
textarea: boolean;
|
||||||
select: boolean;
|
select: boolean;
|
||||||
}>;
|
}>;
|
||||||
|
|
||||||
|
export type SlimDOMOptions = Partial<{
|
||||||
|
script: boolean;
|
||||||
|
comment: boolean;
|
||||||
|
headFavicon: boolean;
|
||||||
|
headWhitespace: boolean;
|
||||||
|
headMetaDescKeywords: boolean;
|
||||||
|
headMetaSocial: boolean;
|
||||||
|
headMetaRobots: boolean;
|
||||||
|
headMetaHttpEquiv: boolean;
|
||||||
|
headMetaAuthorship: boolean;
|
||||||
|
headMetaVerification: boolean;
|
||||||
|
}>;
|
||||||
|
|||||||
Reference in New Issue
Block a user