From 07ac5c9e1371824ec3ffb705f9250bbe10f4b73e Mon Sep 17 00:00:00 2001 From: Eoghan Murray Date: Fri, 24 Nov 2023 16:06:02 +0000 Subject: [PATCH] Masking: Avoid the repeated calls to `closest` when recursing through the DOM (#1349) * masking performance: avoid the repeated calls to `closest` when recursing through the DOM - needsMask===true means that an ancestor has tested positively for masking, and so this node and all descendents should be masked - needsMask===false means that no ancestors have tested positively for masking, we should check each node encountered - needsMask===undefined means that we don't know whether ancestors are masked or not (e.g. after a mutation) and should look up the tree * Add tests including an explicit characterData mutation tests * Further performance improvement: avoid calls to `el.matches` when on a leaf node, e.g. a `
` --------- Authored-by: eoghanmurray Based on initial PR #1338 by Alexey Babik --- .changeset/thin-vans-applaud.md | 6 + packages/rrweb-snapshot/src/snapshot.ts | 63 +++-- packages/rrweb/src/record/mutation.ts | 1 + .../__snapshots__/integration.test.ts.snap | 237 ++++++++++++++++++ packages/rrweb/test/integration.test.ts | 39 +++ packages/rrweb/test/utils.ts | 1 + 6 files changed, 323 insertions(+), 24 deletions(-) create mode 100644 .changeset/thin-vans-applaud.md diff --git a/.changeset/thin-vans-applaud.md b/.changeset/thin-vans-applaud.md new file mode 100644 index 0000000000..e5d8d32a63 --- /dev/null +++ b/.changeset/thin-vans-applaud.md @@ -0,0 +1,6 @@ +--- +'rrweb-snapshot': patch +'rrweb': patch +--- + +Snapshot performance when masking text: Avoid the repeated calls to `closest` when recursing through the DOM diff --git a/packages/rrweb-snapshot/src/snapshot.ts b/packages/rrweb-snapshot/src/snapshot.ts index e6b25dc92b..6d2f2ed2e8 100644 --- a/packages/rrweb-snapshot/src/snapshot.ts +++ b/packages/rrweb-snapshot/src/snapshot.ts @@ -310,6 +310,7 @@ export function needMaskingText( node: Node, maskTextClass: string | RegExp, maskTextSelector: string | null, + checkAncestors: boolean, ): boolean { try { const el: HTMLElement | null = @@ -317,17 +318,21 @@ export function needMaskingText( ? (node as HTMLElement) : node.parentElement; if (el === null) return false; - if (typeof maskTextClass === 'string') { - if (el.classList.contains(maskTextClass)) return true; - if (el.closest(`.${maskTextClass}`)) return true; + if (checkAncestors) { + if (el.closest(`.${maskTextClass}`)) return true; + } else { + if (el.classList.contains(maskTextClass)) return true; + } } else { - if (classMatchesRegex(el, maskTextClass, true)) return true; + if (classMatchesRegex(el, maskTextClass, checkAncestors)) return true; } - if (maskTextSelector) { - if (el.matches(maskTextSelector)) return true; - if (el.closest(maskTextSelector)) return true; + if (checkAncestors) { + if (el.closest(maskTextSelector)) return true; + } else { + if (el.matches(maskTextSelector)) return true; + } } } catch (e) { // @@ -426,8 +431,7 @@ function serializeNode( mirror: Mirror; blockClass: string | RegExp; blockSelector: string | null; - maskTextClass: string | RegExp; - maskTextSelector: string | null; + needsMask: boolean | undefined; inlineStylesheet: boolean; maskInputOptions: MaskInputOptions; maskTextFn: MaskTextFn | undefined; @@ -447,8 +451,7 @@ function serializeNode( mirror, blockClass, blockSelector, - maskTextClass, - maskTextSelector, + needsMask, inlineStylesheet, maskInputOptions = {}, maskTextFn, @@ -500,8 +503,7 @@ function serializeNode( }); case n.TEXT_NODE: return serializeTextNode(n as Text, { - maskTextClass, - maskTextSelector, + needsMask, maskTextFn, rootId, }); @@ -531,13 +533,12 @@ function getRootId(doc: Document, mirror: Mirror): number | undefined { function serializeTextNode( n: Text, options: { - maskTextClass: string | RegExp; - maskTextSelector: string | null; + needsMask: boolean | undefined; maskTextFn: MaskTextFn | undefined; rootId: number | undefined; }, ): serializedNode { - const { maskTextClass, maskTextSelector, maskTextFn, rootId } = options; + const { needsMask, maskTextFn, rootId } = options; // The parent node may not be a html element which has a tagName attribute. // So just let it be undefined which is ok in this use case. const parentTagName = n.parentNode && (n.parentNode as HTMLElement).tagName; @@ -568,12 +569,7 @@ function serializeTextNode( if (isScript) { textContent = 'SCRIPT_PLACEHOLDER'; } - if ( - !isStyle && - !isScript && - textContent && - needMaskingText(n, maskTextClass, maskTextSelector) - ) { + if (!isStyle && !isScript && textContent && needsMask) { textContent = maskTextFn ? maskTextFn(textContent, n.parentElement) : textContent.replace(/[\S]/g, '*'); @@ -935,6 +931,7 @@ export function serializeNodeWithId( inlineStylesheet: boolean; newlyAddedElement?: boolean; maskInputOptions?: MaskInputOptions; + needsMask?: boolean; maskTextFn: MaskTextFn | undefined; maskInputFn: MaskInputFn | undefined; slimDOMOptions: SlimDOMOptions; @@ -980,14 +977,29 @@ export function serializeNodeWithId( keepIframeSrcFn = () => false, newlyAddedElement = false, } = options; + let { needsMask } = options; let { preserveWhiteSpace = true } = options; + + if ( + !needsMask && + n.childNodes // we can avoid the check on leaf elements, as masking is applied to child text nodes only + ) { + // perf: if needsMask = true, children won't also need to check + const checkAncestors = needsMask === undefined; // if false, we've already checked ancestors + needsMask = needMaskingText( + n as Element, + maskTextClass, + maskTextSelector, + checkAncestors, + ); + } + const _serializedNode = serializeNode(n, { doc, mirror, blockClass, blockSelector, - maskTextClass, - maskTextSelector, + needsMask, inlineStylesheet, maskInputOptions, maskTextFn, @@ -1058,6 +1070,7 @@ export function serializeNodeWithId( mirror, blockClass, blockSelector, + needsMask, maskTextClass, maskTextSelector, skipChild, @@ -1118,6 +1131,7 @@ export function serializeNodeWithId( mirror, blockClass, blockSelector, + needsMask, maskTextClass, maskTextSelector, skipChild: false, @@ -1165,6 +1179,7 @@ export function serializeNodeWithId( mirror, blockClass, blockSelector, + needsMask, maskTextClass, maskTextSelector, skipChild: false, diff --git a/packages/rrweb/src/record/mutation.ts b/packages/rrweb/src/record/mutation.ts index 7c209605d0..2f6b6550ff 100644 --- a/packages/rrweb/src/record/mutation.ts +++ b/packages/rrweb/src/record/mutation.ts @@ -515,6 +515,7 @@ export default class MutationBuffer { m.target, this.maskTextClass, this.maskTextSelector, + true, // checkAncestors ) && value ? this.maskTextFn ? this.maskTextFn(value, closestElementOfNode(m.target)) diff --git a/packages/rrweb/test/__snapshots__/integration.test.ts.snap b/packages/rrweb/test/__snapshots__/integration.test.ts.snap index e320254111..e3fb552c7e 100644 --- a/packages/rrweb/test/__snapshots__/integration.test.ts.snap +++ b/packages/rrweb/test/__snapshots__/integration.test.ts.snap @@ -793,6 +793,243 @@ exports[`record integration tests can mask character data mutations 1`] = ` } ] } + }, + { + \\"type\\": 3, + \\"data\\": { + \\"source\\": 0, + \\"texts\\": [ + { + \\"id\\": 22, + \\"value\\": \\"****** *******\\" + } + ], + \\"attributes\\": [], + \\"removes\\": [], + \\"adds\\": [] + } + } +]" +`; + +exports[`record integration tests can mask character data mutations with regexp 1`] = ` +"[ + { + \\"type\\": 0, + \\"data\\": {} + }, + { + \\"type\\": 1, + \\"data\\": {} + }, + { + \\"type\\": 4, + \\"data\\": { + \\"href\\": \\"about:blank\\", + \\"width\\": 1920, + \\"height\\": 1080 + } + }, + { + \\"type\\": 2, + \\"data\\": { + \\"node\\": { + \\"type\\": 0, + \\"childNodes\\": [ + { + \\"type\\": 1, + \\"name\\": \\"html\\", + \\"publicId\\": \\"\\", + \\"systemId\\": \\"\\", + \\"id\\": 2 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"html\\", + \\"attributes\\": {}, + \\"childNodes\\": [ + { + \\"type\\": 2, + \\"tagName\\": \\"head\\", + \\"attributes\\": {}, + \\"childNodes\\": [], + \\"id\\": 4 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"body\\", + \\"attributes\\": {}, + \\"childNodes\\": [ + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n \\", + \\"id\\": 6 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"p\\", + \\"attributes\\": {}, + \\"childNodes\\": [ + { + \\"type\\": 3, + \\"textContent\\": \\"mutation observer\\", + \\"id\\": 8 + } + ], + \\"id\\": 7 + }, + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n \\", + \\"id\\": 9 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"ul\\", + \\"attributes\\": {}, + \\"childNodes\\": [ + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n \\", + \\"id\\": 11 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"li\\", + \\"attributes\\": {}, + \\"childNodes\\": [], + \\"id\\": 12 + }, + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n \\", + \\"id\\": 13 + } + ], + \\"id\\": 10 + }, + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n \\", + \\"id\\": 14 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"canvas\\", + \\"attributes\\": {}, + \\"childNodes\\": [], + \\"id\\": 15 + }, + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n\\\\n \\", + \\"id\\": 16 + }, + { + \\"type\\": 2, + \\"tagName\\": \\"script\\", + \\"attributes\\": {}, + \\"childNodes\\": [ + { + \\"type\\": 3, + \\"textContent\\": \\"SCRIPT_PLACEHOLDER\\", + \\"id\\": 18 + } + ], + \\"id\\": 17 + }, + { + \\"type\\": 3, + \\"textContent\\": \\"\\\\n \\\\n \\\\n\\", + \\"id\\": 19 + } + ], + \\"id\\": 5 + } + ], + \\"id\\": 3 + } + ], + \\"id\\": 1 + }, + \\"initialOffset\\": { + \\"left\\": 0, + \\"top\\": 0 + } + } + }, + { + \\"type\\": 3, + \\"data\\": { + \\"source\\": 0, + \\"texts\\": [], + \\"attributes\\": [ + { + \\"id\\": 10, + \\"attributes\\": { + \\"class\\": \\"custom-mask\\" + } + }, + { + \\"id\\": 7, + \\"attributes\\": { + \\"class\\": \\"custom-mask\\" + } + } + ], + \\"removes\\": [ + { + \\"parentId\\": 7, + \\"id\\": 8 + } + ], + \\"adds\\": [ + { + \\"parentId\\": 10, + \\"nextId\\": null, + \\"node\\": { + \\"type\\": 2, + \\"tagName\\": \\"li\\", + \\"attributes\\": {}, + \\"childNodes\\": [], + \\"id\\": 20 + } + }, + { + \\"parentId\\": 20, + \\"nextId\\": null, + \\"node\\": { + \\"type\\": 3, + \\"textContent\\": \\"*** **** ****\\", + \\"id\\": 21 + } + }, + { + \\"parentId\\": 7, + \\"nextId\\": null, + \\"node\\": { + \\"type\\": 3, + \\"textContent\\": \\"*******\\", + \\"id\\": 22 + } + } + ] + } + }, + { + \\"type\\": 3, + \\"data\\": { + \\"source\\": 0, + \\"texts\\": [ + { + \\"id\\": 21, + \\"value\\": \\"********** ****** ** ****** *** **** ****\\" + } + ], + \\"attributes\\": [], + \\"removes\\": [], + \\"adds\\": [] + } } ]" `; diff --git a/packages/rrweb/test/integration.test.ts b/packages/rrweb/test/integration.test.ts index c627be84cb..0d4c871854 100644 --- a/packages/rrweb/test/integration.test.ts +++ b/packages/rrweb/test/integration.test.ts @@ -1207,6 +1207,45 @@ describe('record integration tests', function (this: ISuite) { p.innerText = 'mutated'; }); + await page.evaluate(() => { + // generate a characterData mutation; innerText doesn't do that + const p = document.querySelector('p') as HTMLParagraphElement; + (p.childNodes[0] as Text).insertData(0, 'doubly '); + }); + + const snapshots = (await page.evaluate( + 'window.snapshots', + )) as eventWithTime[]; + assertSnapshot(snapshots); + }); + + it('can mask character data mutations with regexp', async () => { + const page: puppeteer.Page = await browser.newPage(); + await page.goto('about:blank'); + await page.setContent( + getHtml.call(this, 'mutation-observer.html', { + maskTextClass: /custom/, + }), + ); + + await page.evaluate(() => { + const li = document.createElement('li'); + const ul = document.querySelector('ul') as HTMLUListElement; + const p = document.querySelector('p') as HTMLParagraphElement; + [ul, p].forEach((element) => { + element.className = 'custom-mask'; + }); + ul.appendChild(li); + li.innerText = 'new list item'; + p.innerText = 'mutated'; + }); + + await page.evaluate(() => { + // generate a characterData mutation; innerText doesn't do that + const li = document.querySelector('li:not(:empty)') as HTMLLIElement; + (li.childNodes[0] as Text).insertData(0, 'descendent should be masked '); + }); + const snapshots = (await page.evaluate( 'window.snapshots', )) as eventWithTime[]; diff --git a/packages/rrweb/test/utils.ts b/packages/rrweb/test/utils.ts index 5a90f62031..6cd93281f9 100644 --- a/packages/rrweb/test/utils.ts +++ b/packages/rrweb/test/utils.ts @@ -693,6 +693,7 @@ export function generateRecordSnippet(options: recordOptions) { maskAllInputs: ${options.maskAllInputs}, maskInputOptions: ${JSON.stringify(options.maskAllInputs)}, userTriggeredOnInput: ${options.userTriggeredOnInput}, + maskTextClass: ${options.maskTextClass}, maskTextFn: ${options.maskTextFn}, maskInputFn: ${options.maskInputFn}, recordCanvas: ${options.recordCanvas},