import type { Token } from './types'; import { childlessTags } from './tags'; interface State { str: string; position: number; tokens: Token[]; } const jumpPosition = (state: State, end: number) => { const len = end - state.position; movePositopn(state, len); }; const movePositopn = (state: State, len: number) => { state.position = state.position + len; }; const findTextEnd = (str: string, index: number) => { const isEnd = false; while (!isEnd) { const textEnd = str.indexOf('<', index); if (textEnd === -1) { return textEnd; } const char = str.charAt(textEnd + 1); if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) { return textEnd; } index = textEnd + 1; } return -1; }; const lexText = (state: State) => { const { str } = state; let textEnd = findTextEnd(str, state.position); if (textEnd === state.position) return; if (textEnd === -1) { textEnd = str.length; } const content = str.slice(state.position, textEnd); jumpPosition(state, textEnd); state.tokens.push({ type: 'text', content, }); }; const lexComment = (state: State) => { const { str } = state; movePositopn(state, 4); let contentEnd = str.indexOf('-->', state.position); let commentEnd = contentEnd + 3; if (contentEnd === -1) { contentEnd = commentEnd = str.length; } const content = str.slice(state.position, contentEnd); jumpPosition(state, commentEnd); state.tokens.push({ type: 'comment', content, }); }; const lexTagName = (state: State) => { const { str } = state; const len = str.length; let start = state.position; while (start < len) { const char = str.charAt(start); const isTagChar = !(/\s/.test(char) || char === '/' || char === '>'); if (isTagChar) break; start++; } let end = start + 1; while (end < len) { const char = str.charAt(end); const isTagChar = !(/\s/.test(char) || char === '/' || char === '>'); if (!isTagChar) break; end++; } jumpPosition(state, end); const tagName = str.slice(start, end); state.tokens.push({ type: 'tag', content: tagName, }); return tagName; }; const lexTagAttributes = (state: State) => { const { str, tokens } = state; let cursor = state.position; let quote = null; let wordBegin = cursor; const words = []; const len = str.length; while (cursor < len) { const char = str.charAt(cursor); if (quote) { const isQuoteEnd = char === quote; if (isQuoteEnd) quote = null; cursor++; continue; } const isTagEnd = char === '/' || char === '>'; if (isTagEnd) { if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)); break; } const isWordEnd = /\s/.test(char); if (isWordEnd) { if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)); wordBegin = cursor + 1; cursor++; continue; } const isQuoteStart = char === "'" || char === '"'; if (isQuoteStart) { quote = char; cursor++; continue; } cursor++; } jumpPosition(state, cursor); const type = 'attribute'; for (let i = 0; i < words.length; i++) { const word = words[i]; const isNotPair = word.indexOf('=') === -1; if (isNotPair) { const secondWord = words[i + 1]; if (secondWord && secondWord.startsWith('=')) { if (secondWord.length > 1) { const newWord = word + secondWord; tokens.push({ type, content: newWord }); i += 1; continue; } const thirdWord = words[i + 2]; i += 1; if (thirdWord) { const newWord = word + '=' + thirdWord; tokens.push({ type, content: newWord }); i += 1; continue; } } } if (word.endsWith('=')) { const secondWord = words[i + 1]; if (secondWord && secondWord.indexOf('=') === -1) { const newWord = word + secondWord; tokens.push({ type, content: newWord }); i += 1; continue; } const newWord = word.slice(0, -1); tokens.push({ type, content: newWord }); continue; } tokens.push({ type, content: word }); } }; const lexSkipTag = (tagName: string, state: State) => { const { str, tokens } = state; const safeTagName = tagName.toLowerCase(); const len = str.length; let index = state.position; while (index < len) { const nextTag = str.indexOf(' { const { str } = state; const secondChar = str.charAt(state.position + 1); const tagStartClose = secondChar === '/'; movePositopn(state, tagStartClose ? 2 : 1); state.tokens.push({ type: 'tag-start', close: tagStartClose, }); const tagName = lexTagName(state); lexTagAttributes(state); const firstChar = str.charAt(state.position); const tagEndClose = firstChar === '/'; movePositopn(state, tagEndClose ? 2 : 1); state.tokens.push({ type: 'tag-end', close: tagEndClose, }); return tagName; }; const lex = (state: State) => { const str = state.str; const len = str.length; while (state.position < len) { const start = state.position; lexText(state); if (state.position === start) { const isComment = str.startsWith('!--', start + 1); if (isComment) lexComment(state); else { const tagName = lexTag(state); const safeTag = tagName.toLowerCase(); if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state); } } } }; export const lexer = (str: string): Token[] => { const state = { str, position: 0, tokens: [], }; lex(state); return state.tokens; };