Skip to content

Commit

Permalink
Merge pull request ashi009#13 from blex41/parse-comments
Browse files Browse the repository at this point in the history
feat: add support for comment parsing
  • Loading branch information
taoqf authored Jan 14, 2020
2 parents 1083ffd + 1d03ef0 commit c145442
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 3 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ Parse given data, and return root of the generated DOM.
lowerCaseTagName: false, // convert tag name to lower case (hurt performance heavily)
script: false, // retrieve content in <script> (hurt performance slightly)
style: false, // retrieve content in <style> (hurt performance slightly)
pre: false // retrieve content in <pre> (hurt performance slightly)
pre: false, // retrieve content in <pre> (hurt performance slightly)
comment: false // retrieve comments (hurt performance slightly)
}
```

Expand Down
36 changes: 34 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ import { decode } from 'he';

export enum NodeType {
ELEMENT_NODE = 1,
TEXT_NODE = 3
TEXT_NODE = 3,
COMMENT_NODE = 8
}

/**
Expand Down Expand Up @@ -52,6 +53,31 @@ export class TextNode extends Node {
}
}

export class CommentNode extends Node {
constructor(value: string) {
super();
this.rawText = value;
}

/**
* Node Type declaration.
* @type {Number}
*/
nodeType = NodeType.COMMENT_NODE;

/**
* Get unescaped text value of current node and its children.
* @return {string} text content
*/
get text() {
return decode(this.rawText);
}

toString() {
return `<!--${this.rawText}-->`;
}
}

const kBlockElements = {
div: true,
p: true,
Expand Down Expand Up @@ -193,7 +219,7 @@ export class HTMLElement extends Node {
currentBlock.push(text);
}
}
}
}
dfs(this);
return blocks
.map(function (block) {
Expand Down Expand Up @@ -754,6 +780,7 @@ export function parse(data: string, options?: {
script?: boolean;
style?: boolean;
pre?: boolean;
comment?: boolean;
}) {
const root = new HTMLElement(null, {});
let currentParent = root;
Expand All @@ -772,6 +799,11 @@ export function parse(data: string, options?: {
lastTextPos = kMarkupPattern.lastIndex;
if (match[0][1] == '!') {
// this is a comment
if (options.comment) {
// Only keep what is in between <!-- and -->
const text = data.substring(lastTextPos - 3 , lastTextPos - match[0].length + 4);
currentParent.appendChild(new CommentNode(text));
}
continue;
}
if (options.lowerCaseTagName)
Expand Down
54 changes: 54 additions & 0 deletions test/html.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ describe('HTML Parser', function () {
var Matcher = HTMLParser.Matcher;
var HTMLElement = HTMLParser.HTMLElement;
var TextNode = HTMLParser.TextNode;
var CommentNode = HTMLParser.CommentNode;

describe('Matcher', function () {
it('should match corrent elements', function () {
Expand Down Expand Up @@ -97,6 +98,34 @@ describe('HTML Parser', function () {

});

it('should parse "<div><a><!-- my comment --></a></div>" and return root element without comments', function () {
var root = parseHTML('<div><a><!-- my comment --></a></div>');

var div = new HTMLElement('div', {}, '');
var a = div.appendChild(new HTMLElement('a', {}, ''));

root.firstChild.should.eql(div);
});

it('should parse "<div><a><!-- my comment --></a></div>" and return root element with comments', function () {
var root = parseHTML('<div><a><!-- my comment --></a></div>', { comment: true });

var div = new HTMLElement('div', {}, '');
var a = div.appendChild(new HTMLElement('a', {}, ''));
var comment = a.appendChild(new CommentNode(' my comment '));

root.firstChild.should.eql(div);
});

it('should not parse HTML inside comments', function () {
var root = parseHTML('<div><!--<a></a>--></div>', { comment: true });

var div = new HTMLElement('div', {}, '');
var comment = div.appendChild(new CommentNode('<a></a>'));

root.firstChild.should.eql(div);
});

it('should parse picture element', function () {

var root = parseHTML('<picture><source srcset="/images/example-1.jpg 1200w, /images/example-2.jpg 1600w" sizes="100vw"><img src="/images/example.jpg" alt="Example"/></picture>');
Expand Down Expand Up @@ -319,6 +348,11 @@ describe('HTML Parser', function () {
var root = parseHTML('<span>o<p>a</p><p>b</p>c</span>');
root.structuredText.should.eql('o\na\nb\nc');
});

it('should not return comments in structured text', function () {
var root = parseHTML('<span>o<p>a</p><!-- my comment --></span>', { comment: true });
root.structuredText.should.eql('o\na');
});
});
describe('#set_content', function () {
it('set content string', function () {
Expand Down Expand Up @@ -350,6 +384,26 @@ describe('HTML Parser', function () {
const root = parseHTML(html);
root.toString().should.eql(html)
});

it('#toString() should not return comments by default', function () {
const html = '<p><!-- my comment --></p>';
const result = '<p></p>';
const root = parseHTML(html);
root.toString().should.eql(result);
});

it('#toString() should return comments when specified', function () {
const html = '<!----><p><!-- my comment --></p>';
const root = parseHTML(html, { comment: true });
root.toString().should.eql(html);
});
});

describe('Comment Element', function () {
it('comment nodeType should be 8', function () {
var root = parseHTML('<!-- my comment -->', { comment: true });
root.firstChild.nodeType.should.eql(8);
});
});

describe('Custom Element', function () {
Expand Down

0 comments on commit c145442

Please sign in to comment.