"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Document_js_1 = __importDefault(require("../nodes/document/Document.cjs"));
const PropertySymbol = __importStar(require("../PropertySymbol.cjs"));
const NamespaceURI_js_1 = __importDefault(require("../config/NamespaceURI.cjs"));
const HTMLElementConfig_js_1 = __importDefault(require("../config/HTMLElementConfig.cjs"));
const HTMLElementConfigContentModelEnum_js_1 = __importDefault(require("../config/HTMLElementConfigContentModelEnum.cjs"));
const SVGElementConfig_js_1 = __importDefault(require("../config/SVGElementConfig.cjs"));
const StringUtility_js_1 = __importDefault(require("../utilities/StringUtility.cjs"));
const XMLEncodeUtility_js_1 = __importDefault(require("../utilities/XMLEncodeUtility.cjs"));
const NodeTypeEnum_js_1 = __importDefault(require("../nodes/node/NodeTypeEnum.cjs"));
const NodeFactory_js_1 = __importDefault(require("../nodes/NodeFactory.cjs"));
/**
* Markup RegExp.
*
* Group 1: Beginning of start tag (e.g. "div" in "
").
* Group 3: Comment start tag ""
* Group 5: Document type start tag "" in "
![]()
").
* Group 8: End of start tag or comment tag (e.g. ">" in "
").
*/
const MARKUP_REGEXP = /<([^\s/!>?]+)|<\/([^\s/!>?]+)\s*>|(|--!>)|()|(>)/gm;
/**
* Attribute RegExp.
*
* Group 1: Attribute name when the attribute has a value with no apostrophes (e.g. "name" in "
").
* Group 2: Attribute value when the attribute has a value with no apostrophes (e.g. "value" in "
").
* Group 3: Attribute name when the attribute has a value using double apostrophe (e.g. "name" in "
").
* Group 4: Attribute value when the attribute has a value using double apostrophe (e.g. "value" in "
").
* Group 5: Attribute end apostrophe when the attribute has a value using double apostrophe (e.g. '"' in "
").
* Group 6: Attribute name when the attribute has a value using single apostrophe (e.g. "name" in "
").
* Group 7: Attribute value when the attribute has a value using single apostrophe (e.g. "value" in "
").
* Group 8: Attribute end apostrophe when the attribute has a value using single apostrophe (e.g. "'" in "
").
* Group 9: Attribute name when the attribute has no value (e.g. "disabled" in "
").
*/
const ATTRIBUTE_REGEXP = /\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*([^"'=<>\\`\s]+)|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*"([^"]*)("{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*'([^']*)('{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)/gm;
/**
* Document type attribute RegExp.
*
* Group 1: Attribute value.
*/
const DOCUMENT_TYPE_ATTRIBUTE_REGEXP = /"([^"]+)"/gm;
/**
* Space RegExp.
*/
const SPACE_REGEXP = /\s+/;
/**
* Space in the beginning of string RegExp.
*/
const SPACE_IN_BEGINNING_REGEXP = /^\s+/;
/**
* Markup read state (which state the parser is in).
*/
var MarkupReadStateEnum;
(function (MarkupReadStateEnum) {
MarkupReadStateEnum["any"] = "any";
MarkupReadStateEnum["startTag"] = "startTag";
MarkupReadStateEnum["comment"] = "comment";
MarkupReadStateEnum["documentType"] = "documentType";
MarkupReadStateEnum["processingInstruction"] = "processingInstruction";
MarkupReadStateEnum["rawTextElement"] = "rawTextElement";
})(MarkupReadStateEnum || (MarkupReadStateEnum = {}));
/**
* How much of the HTML document that has been parsed (where the parser level is).
*/
var HTMLDocumentStructureLevelEnum;
(function (HTMLDocumentStructureLevelEnum) {
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["root"] = 0] = "root";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["doctype"] = 1] = "doctype";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["documentElement"] = 2] = "documentElement";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["head"] = 3] = "head";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["additionalHeadWithoutBody"] = 4] = "additionalHeadWithoutBody";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["body"] = 5] = "body";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["afterBody"] = 6] = "afterBody";
})(HTMLDocumentStructureLevelEnum || (HTMLDocumentStructureLevelEnum = {}));
/**
* HTML parser.
*/
class HTMLParser {
window;
evaluateScripts = false;
rootNode = null;
rootDocument = null;
nodeStack = [];
tagNameStack = [];
documentStructure = null;
startTagIndex = 0;
markupRegExp = null;
nextElement = null;
currentNode = null;
readState = MarkupReadStateEnum.any;
/**
* Constructor.
*
* @param window Window.
* @param [options] Options.
* @param [options.evaluateScripts] Set to "true" to enable script execution
*/
constructor(window, options) {
this.window = window;
if (options?.evaluateScripts) {
this.evaluateScripts = true;
}
}
/**
* Parses HTML a root element containing nodes found.
*
* @param html HTML string.
* @param [rootNode] Root node.
* @returns Root node.
*/
parse(html, rootNode) {
this.rootNode = rootNode || this.window.document.createDocumentFragment();
this.rootDocument = this.rootNode instanceof Document_js_1.default ? this.rootNode : this.window.document;
this.nodeStack = [this.rootNode];
this.tagNameStack = [null];
this.currentNode = this.rootNode;
this.readState = MarkupReadStateEnum.any;
this.documentStructure = null;
this.startTagIndex = 0;
this.markupRegExp = new RegExp(MARKUP_REGEXP, 'gm');
if (this.rootNode instanceof Document_js_1.default) {
const { doctype, documentElement, head, body } = this.rootNode;
if (!documentElement || !head || !body) {
throw new Error('Failed to parse HTML: The root node must have "documentElement", "head" and "body".\n\nWe should not end up here and it is therefore a bug in Happy DOM. Please report this issue.');
}
this.documentStructure = {
nodes: {
doctype: doctype || null,
documentElement,
head,
body
},
level: HTMLDocumentStructureLevelEnum.root
};
}
if (this.rootNode instanceof this.window.HTMLHtmlElement) {
const head = this.rootDocument.createElement('head');
const body = this.rootDocument.createElement('body');
while (this.rootNode[PropertySymbol.nodeArray].length > 0) {
this.rootNode[PropertySymbol.removeChild](this.rootNode[PropertySymbol.nodeArray][this.rootNode[PropertySymbol.nodeArray].length - 1]);
}
this.rootNode[PropertySymbol.appendChild](head);
this.rootNode[PropertySymbol.appendChild](body);
this.documentStructure = {
nodes: {
doctype: null,
documentElement: this.rootNode,
head,
body
},
level: HTMLDocumentStructureLevelEnum.documentElement
};
}
let match;
let lastIndex = 0;
html = String(html);
while ((match = this.markupRegExp.exec(html))) {
switch (this.readState) {
case MarkupReadStateEnum.any:
// Plain text between tags.
if (match.index !== lastIndex &&
(match[1] || match[2] || match[3] || match[4] || match[5] !== undefined || match[6])) {
this.parsePlainText(html.substring(lastIndex, match.index));
}
if (match[1]) {
// Start tag.
this.nextElement = this.getStartTagElement(match[1]);
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.startTag;
}
else if (match[2]) {
// End tag.
this.parseEndTag(match[2]);
}
else if (match[3]) {
// Comment.
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.comment;
}
else if (match[5] !== undefined) {
// Document type.
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.documentType;
}
else if (match[6]) {
// Processing instruction.
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.processingInstruction;
}
else {
// Plain text between tags, including the matched tag as it is not a valid start or end tag.
this.parsePlainText(html.substring(lastIndex, this.markupRegExp.lastIndex));
}
break;
case MarkupReadStateEnum.startTag:
// End of start tag
// match[2] is matching an end tag in case the start tag wasn't closed (e.g. "
" instead of "
\n").
// match[7] is matching "/>" (e.g. "
![]()
").
// match[8] is matching ">" (e.g. "
").
if (match[7] || match[8] || match[2]) {
if (this.nextElement) {
const attributeString = html.substring(this.startTagIndex, match[2] ? this.markupRegExp.lastIndex - 1 : match.index);
const isSelfClosed = !!match[7];
this.parseEndOfStartTag(attributeString, isSelfClosed);
}
else {
// If "nextElement" is set to null, the tag is not allowed (, and are not allowed in an HTML fragment or to be nested).
this.readState = MarkupReadStateEnum.any;
}
}
break;
case MarkupReadStateEnum.comment:
// Comment end tag.
if (match[4]) {
this.parseComment(html.substring(this.startTagIndex, match.index));
}
break;
case MarkupReadStateEnum.documentType:
// Document type end tag.
if (match[7] || match[8]) {
this.parseDocumentType(html.substring(this.startTagIndex, match.index));
}
break;
case MarkupReadStateEnum.processingInstruction:
// Processing instruction end tag.
if (match[7] || match[8]) {
// Processing instructions are not supported in HTML and are rendered as comments.
this.parseComment('?' + html.substring(this.startTagIndex, match.index));
}
break;
case MarkupReadStateEnum.rawTextElement:
// End tag of raw text content.
//