Spaces:
No application file
No application file
| import type { Token } from './types'; | |
| import { childlessTags } from './tags'; | |
| interface State { | |
| str: string; | |
| position: number; | |
| tokens: Token[]; | |
| } | |
| const jumpPosition = (state: State, end: number) => { | |
| const len = end - state.position; | |
| movePositopn(state, len); | |
| }; | |
| const movePositopn = (state: State, len: number) => { | |
| state.position = state.position + len; | |
| }; | |
| const findTextEnd = (str: string, index: number) => { | |
| const isEnd = false; | |
| while (!isEnd) { | |
| const textEnd = str.indexOf('<', index); | |
| if (textEnd === -1) { | |
| return textEnd; | |
| } | |
| const char = str.charAt(textEnd + 1); | |
| if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) { | |
| return textEnd; | |
| } | |
| index = textEnd + 1; | |
| } | |
| return -1; | |
| }; | |
| const lexText = (state: State) => { | |
| const { str } = state; | |
| let textEnd = findTextEnd(str, state.position); | |
| if (textEnd === state.position) return; | |
| if (textEnd === -1) { | |
| textEnd = str.length; | |
| } | |
| const content = str.slice(state.position, textEnd); | |
| jumpPosition(state, textEnd); | |
| state.tokens.push({ | |
| type: 'text', | |
| content, | |
| }); | |
| }; | |
| const lexComment = (state: State) => { | |
| const { str } = state; | |
| movePositopn(state, 4); | |
| let contentEnd = str.indexOf('-->', state.position); | |
| let commentEnd = contentEnd + 3; | |
| if (contentEnd === -1) { | |
| contentEnd = commentEnd = str.length; | |
| } | |
| const content = str.slice(state.position, contentEnd); | |
| jumpPosition(state, commentEnd); | |
| state.tokens.push({ | |
| type: 'comment', | |
| content, | |
| }); | |
| }; | |
| const lexTagName = (state: State) => { | |
| const { str } = state; | |
| const len = str.length; | |
| let start = state.position; | |
| while (start < len) { | |
| const char = str.charAt(start); | |
| const isTagChar = !(/\s/.test(char) || char === '/' || char === '>'); | |
| if (isTagChar) break; | |
| start++; | |
| } | |
| let end = start + 1; | |
| while (end < len) { | |
| const char = str.charAt(end); | |
| const isTagChar = !(/\s/.test(char) || char === '/' || char === '>'); | |
| if (!isTagChar) break; | |
| end++; | |
| } | |
| jumpPosition(state, end); | |
| const tagName = str.slice(start, end); | |
| state.tokens.push({ | |
| type: 'tag', | |
| content: tagName, | |
| }); | |
| return tagName; | |
| }; | |
| const lexTagAttributes = (state: State) => { | |
| const { str, tokens } = state; | |
| let cursor = state.position; | |
| let quote = null; | |
| let wordBegin = cursor; | |
| const words = []; | |
| const len = str.length; | |
| while (cursor < len) { | |
| const char = str.charAt(cursor); | |
| if (quote) { | |
| const isQuoteEnd = char === quote; | |
| if (isQuoteEnd) quote = null; | |
| cursor++; | |
| continue; | |
| } | |
| const isTagEnd = char === '/' || char === '>'; | |
| if (isTagEnd) { | |
| if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)); | |
| break; | |
| } | |
| const isWordEnd = /\s/.test(char); | |
| if (isWordEnd) { | |
| if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)); | |
| wordBegin = cursor + 1; | |
| cursor++; | |
| continue; | |
| } | |
| const isQuoteStart = char === "'" || char === '"'; | |
| if (isQuoteStart) { | |
| quote = char; | |
| cursor++; | |
| continue; | |
| } | |
| cursor++; | |
| } | |
| jumpPosition(state, cursor); | |
| const type = 'attribute'; | |
| for (let i = 0; i < words.length; i++) { | |
| const word = words[i]; | |
| const isNotPair = word.indexOf('=') === -1; | |
| if (isNotPair) { | |
| const secondWord = words[i + 1]; | |
| if (secondWord && secondWord.startsWith('=')) { | |
| if (secondWord.length > 1) { | |
| const newWord = word + secondWord; | |
| tokens.push({ type, content: newWord }); | |
| i += 1; | |
| continue; | |
| } | |
| const thirdWord = words[i + 2]; | |
| i += 1; | |
| if (thirdWord) { | |
| const newWord = word + '=' + thirdWord; | |
| tokens.push({ type, content: newWord }); | |
| i += 1; | |
| continue; | |
| } | |
| } | |
| } | |
| if (word.endsWith('=')) { | |
| const secondWord = words[i + 1]; | |
| if (secondWord && secondWord.indexOf('=') === -1) { | |
| const newWord = word + secondWord; | |
| tokens.push({ type, content: newWord }); | |
| i += 1; | |
| continue; | |
| } | |
| const newWord = word.slice(0, -1); | |
| tokens.push({ type, content: newWord }); | |
| continue; | |
| } | |
| tokens.push({ type, content: word }); | |
| } | |
| }; | |
| const lexSkipTag = (tagName: string, state: State) => { | |
| const { str, tokens } = state; | |
| const safeTagName = tagName.toLowerCase(); | |
| const len = str.length; | |
| let index = state.position; | |
| while (index < len) { | |
| const nextTag = str.indexOf('</', index); | |
| if (nextTag === -1) { | |
| lexText(state); | |
| break; | |
| } | |
| const tagState = { | |
| str, | |
| position: state.position, | |
| tokens: [], | |
| }; | |
| jumpPosition(tagState, nextTag); | |
| const name = lexTag(tagState); | |
| if (safeTagName !== name.toLowerCase()) { | |
| index = tagState.position; | |
| continue; | |
| } | |
| if (nextTag !== state.position) { | |
| const textStart = state.position; | |
| jumpPosition(state, nextTag); | |
| tokens.push({ | |
| type: 'text', | |
| content: str.slice(textStart, nextTag), | |
| }); | |
| } | |
| tokens.push(...tagState.tokens); | |
| jumpPosition(state, tagState.position); | |
| break; | |
| } | |
| }; | |
| const lexTag = (state: State) => { | |
| const { str } = state; | |
| const secondChar = str.charAt(state.position + 1); | |
| const tagStartClose = secondChar === '/'; | |
| movePositopn(state, tagStartClose ? 2 : 1); | |
| state.tokens.push({ | |
| type: 'tag-start', | |
| close: tagStartClose, | |
| }); | |
| const tagName = lexTagName(state); | |
| lexTagAttributes(state); | |
| const firstChar = str.charAt(state.position); | |
| const tagEndClose = firstChar === '/'; | |
| movePositopn(state, tagEndClose ? 2 : 1); | |
| state.tokens.push({ | |
| type: 'tag-end', | |
| close: tagEndClose, | |
| }); | |
| return tagName; | |
| }; | |
| const lex = (state: State) => { | |
| const str = state.str; | |
| const len = str.length; | |
| while (state.position < len) { | |
| const start = state.position; | |
| lexText(state); | |
| if (state.position === start) { | |
| const isComment = str.startsWith('!--', start + 1); | |
| if (isComment) lexComment(state); | |
| else { | |
| const tagName = lexTag(state); | |
| const safeTag = tagName.toLowerCase(); | |
| if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state); | |
| } | |
| } | |
| } | |
| }; | |
| export const lexer = (str: string): Token[] => { | |
| const state = { | |
| str, | |
| position: 0, | |
| tokens: [], | |
| }; | |
| lex(state); | |
| return state.tokens; | |
| }; | |