"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const Document_js_1 = __importDefault(require("../nodes/document/Document.cjs")); const PropertySymbol = __importStar(require("../PropertySymbol.cjs")); const NamespaceURI_js_1 = __importDefault(require("../config/NamespaceURI.cjs")); const HTMLElementConfig_js_1 = __importDefault(require("../config/HTMLElementConfig.cjs")); const HTMLElementConfigContentModelEnum_js_1 = __importDefault(require("../config/HTMLElementConfigContentModelEnum.cjs")); const SVGElementConfig_js_1 = __importDefault(require("../config/SVGElementConfig.cjs")); const StringUtility_js_1 = __importDefault(require("../utilities/StringUtility.cjs")); const XMLEncodeUtility_js_1 = __importDefault(require("../utilities/XMLEncodeUtility.cjs")); const NodeTypeEnum_js_1 = __importDefault(require("../nodes/node/NodeTypeEnum.cjs")); const NodeFactory_js_1 = __importDefault(require("../nodes/NodeFactory.cjs")); /** * Markup RegExp. * * Group 1: Beginning of start tag (e.g. "div" in ""). * Group 3: Comment start tag "" * Group 5: Document type start tag "" in ""). * Group 8: End of start tag or comment tag (e.g. ">" in "

"). */ const MARKUP_REGEXP = /<([^\s/!>?]+)|<\/([^\s/!>?]+)\s*>|(|--!>)|()|(>)/gm; /** * Attribute RegExp. * * Group 1: Attribute name when the attribute has a value with no apostrophes (e.g. "name" in "

"). * Group 2: Attribute value when the attribute has a value with no apostrophes (e.g. "value" in "

"). * Group 3: Attribute name when the attribute has a value using double apostrophe (e.g. "name" in "

"). * Group 4: Attribute value when the attribute has a value using double apostrophe (e.g. "value" in "

"). * Group 5: Attribute end apostrophe when the attribute has a value using double apostrophe (e.g. '"' in "

"). * Group 6: Attribute name when the attribute has a value using single apostrophe (e.g. "name" in "

"). * Group 7: Attribute value when the attribute has a value using single apostrophe (e.g. "value" in "

"). * Group 8: Attribute end apostrophe when the attribute has a value using single apostrophe (e.g. "'" in "

"). * Group 9: Attribute name when the attribute has no value (e.g. "disabled" in "

"). */ const ATTRIBUTE_REGEXP = /\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*([^"'=<>\\`\s]+)|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*"([^"]*)("{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*'([^']*)('{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)/gm; /** * Document type attribute RegExp. * * Group 1: Attribute value. */ const DOCUMENT_TYPE_ATTRIBUTE_REGEXP = /"([^"]+)"/gm; /** * Space RegExp. */ const SPACE_REGEXP = /\s+/; /** * Space in the beginning of string RegExp. */ const SPACE_IN_BEGINNING_REGEXP = /^\s+/; /** * Markup read state (which state the parser is in). */ var MarkupReadStateEnum; (function (MarkupReadStateEnum) { MarkupReadStateEnum["any"] = "any"; MarkupReadStateEnum["startTag"] = "startTag"; MarkupReadStateEnum["comment"] = "comment"; MarkupReadStateEnum["documentType"] = "documentType"; MarkupReadStateEnum["processingInstruction"] = "processingInstruction"; MarkupReadStateEnum["rawTextElement"] = "rawTextElement"; })(MarkupReadStateEnum || (MarkupReadStateEnum = {})); /** * How much of the HTML document that has been parsed (where the parser level is). */ var HTMLDocumentStructureLevelEnum; (function (HTMLDocumentStructureLevelEnum) { HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["root"] = 0] = "root"; HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["doctype"] = 1] = "doctype"; HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["documentElement"] = 2] = "documentElement"; HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["head"] = 3] = "head"; HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["additionalHeadWithoutBody"] = 4] = "additionalHeadWithoutBody"; HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["body"] = 5] = "body"; HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["afterBody"] = 6] = "afterBody"; })(HTMLDocumentStructureLevelEnum || (HTMLDocumentStructureLevelEnum = {})); /** * HTML parser. */ class HTMLParser { window; evaluateScripts = false; rootNode = null; rootDocument = null; nodeStack = []; tagNameStack = []; documentStructure = null; startTagIndex = 0; markupRegExp = null; nextElement = null; currentNode = null; readState = MarkupReadStateEnum.any; /** * Constructor. * * @param window Window. * @param [options] Options. * @param [options.evaluateScripts] Set to "true" to enable script execution */ constructor(window, options) { this.window = window; if (options?.evaluateScripts) { this.evaluateScripts = true; } } /** * Parses HTML a root element containing nodes found. * * @param html HTML string. * @param [rootNode] Root node. * @returns Root node. */ parse(html, rootNode) { this.rootNode = rootNode || this.window.document.createDocumentFragment(); this.rootDocument = this.rootNode instanceof Document_js_1.default ? this.rootNode : this.window.document; this.nodeStack = [this.rootNode]; this.tagNameStack = [null]; this.currentNode = this.rootNode; this.readState = MarkupReadStateEnum.any; this.documentStructure = null; this.startTagIndex = 0; this.markupRegExp = new RegExp(MARKUP_REGEXP, 'gm'); if (this.rootNode instanceof Document_js_1.default) { const { doctype, documentElement, head, body } = this.rootNode; if (!documentElement || !head || !body) { throw new Error('Failed to parse HTML: The root node must have "documentElement", "head" and "body".\n\nWe should not end up here and it is therefore a bug in Happy DOM. Please report this issue.'); } this.documentStructure = { nodes: { doctype: doctype || null, documentElement, head, body }, level: HTMLDocumentStructureLevelEnum.root }; } if (this.rootNode instanceof this.window.HTMLHtmlElement) { const head = this.rootDocument.createElement('head'); const body = this.rootDocument.createElement('body'); while (this.rootNode[PropertySymbol.nodeArray].length > 0) { this.rootNode[PropertySymbol.removeChild](this.rootNode[PropertySymbol.nodeArray][this.rootNode[PropertySymbol.nodeArray].length - 1]); } this.rootNode[PropertySymbol.appendChild](head); this.rootNode[PropertySymbol.appendChild](body); this.documentStructure = { nodes: { doctype: null, documentElement: this.rootNode, head, body }, level: HTMLDocumentStructureLevelEnum.documentElement }; } let match; let lastIndex = 0; html = String(html); while ((match = this.markupRegExp.exec(html))) { switch (this.readState) { case MarkupReadStateEnum.any: // Plain text between tags. if (match.index !== lastIndex && (match[1] || match[2] || match[3] || match[4] || match[5] !== undefined || match[6])) { this.parsePlainText(html.substring(lastIndex, match.index)); } if (match[1]) { // Start tag. this.nextElement = this.getStartTagElement(match[1]); this.startTagIndex = this.markupRegExp.lastIndex; this.readState = MarkupReadStateEnum.startTag; } else if (match[2]) { // End tag. this.parseEndTag(match[2]); } else if (match[3]) { // Comment. this.startTagIndex = this.markupRegExp.lastIndex; this.readState = MarkupReadStateEnum.comment; } else if (match[5] !== undefined) { // Document type. this.startTagIndex = this.markupRegExp.lastIndex; this.readState = MarkupReadStateEnum.documentType; } else if (match[6]) { // Processing instruction. this.startTagIndex = this.markupRegExp.lastIndex; this.readState = MarkupReadStateEnum.processingInstruction; } else { // Plain text between tags, including the matched tag as it is not a valid start or end tag. this.parsePlainText(html.substring(lastIndex, this.markupRegExp.lastIndex)); } break; case MarkupReadStateEnum.startTag: // End of start tag // match[2] is matching an end tag in case the start tag wasn't closed (e.g. "" instead of "

\n"). // match[7] is matching "/>" (e.g. ""). // match[8] is matching ">" (e.g. "

"). if (match[7] || match[8] || match[2]) { if (this.nextElement) { const attributeString = html.substring(this.startTagIndex, match[2] ? this.markupRegExp.lastIndex - 1 : match.index); const isSelfClosed = !!match[7]; this.parseEndOfStartTag(attributeString, isSelfClosed); } else { // If "nextElement" is set to null, the tag is not allowed (, and are not allowed in an HTML fragment or to be nested). this.readState = MarkupReadStateEnum.any; } } break; case MarkupReadStateEnum.comment: // Comment end tag. if (match[4]) { this.parseComment(html.substring(this.startTagIndex, match.index)); } break; case MarkupReadStateEnum.documentType: // Document type end tag. if (match[7] || match[8]) { this.parseDocumentType(html.substring(this.startTagIndex, match.index)); } break; case MarkupReadStateEnum.processingInstruction: // Processing instruction end tag. if (match[7] || match[8]) { // Processing instructions are not supported in HTML and are rendered as comments. this.parseComment('?' + html.substring(this.startTagIndex, match.index)); } break; case MarkupReadStateEnum.rawTextElement: // End tag of raw text content. //