| | import type { SerializedHTMLElement } from "../../scrape/types"; |
| | import { MarkdownElementType, type MarkdownElement } from "../types"; |
| |
|
| | |
| |
|
| | |
| | export function stringifyMarkdownElement(elem: MarkdownElement): string { |
| | const content = elem.content.trim(); |
| | if (elem.type === MarkdownElementType.Header) return `${"#".repeat(elem.level)} ${content}\n\n`; |
| | if (elem.type === MarkdownElementType.BlockQuote) { |
| | return `${"> ".repeat(elem.depth)}${content}\n\n`; |
| | } |
| | if (elem.type === MarkdownElementType.CodeBlock) return `\`\`\`\n${content}\n\`\`\`\n\n`; |
| |
|
| | if (elem.type === MarkdownElementType.UnorderedListItem) return `- ${content}\n`; |
| | if (elem.type === MarkdownElementType.OrderedListItem) { |
| | const siblings = elem.parent?.children ?? [elem]; |
| | const currentIndex = siblings.indexOf(elem); |
| | const lastAdjacentIndex = siblings |
| | .slice(currentIndex + 1) |
| | .findLastIndex((child) => child.type === MarkdownElementType.OrderedListItem); |
| | const order = currentIndex - lastAdjacentIndex + 1; |
| | return `${order}. ${content}\n`; |
| | } |
| |
|
| | return `${content}\n\n`; |
| | } |
| |
|
| | |
| | export function stringifyMarkdownElementTree(elem: MarkdownElement): string { |
| | const stringified = stringifyMarkdownElement(elem); |
| | if (!("children" in elem)) return stringified; |
| | return stringified + elem.children.map(stringifyMarkdownElementTree).join(""); |
| | } |
| |
|
| | |
| |
|
| | |
| | export function stringifyHTMLElements(elems: (SerializedHTMLElement | string)[]): string { |
| | return elems.map(stringifyHTMLElement).join("").trim(); |
| | } |
| |
|
| | |
| | export function stringifyHTMLElement(elem: SerializedHTMLElement | string): string { |
| | if (typeof elem === "string") return elem; |
| | if (elem.tagName === "br") return "\n"; |
| |
|
| | const content = elem.content.map(stringifyHTMLElement).join(""); |
| | if (content.length === 0) return content; |
| |
|
| | if (elem.tagName === "strong" || elem.tagName === "b") return `**${content}**`; |
| | if (elem.tagName === "em" || elem.tagName === "i") return `*${content}*`; |
| | if (elem.tagName === "s" || elem.tagName === "strike") return `~~${content}~~`; |
| |
|
| | if (elem.tagName === "code" || elem.tagName === "var" || elem.tagName === "tt") { |
| | return `\`${content}\``; |
| | } |
| |
|
| | if (elem.tagName === "sup") return `<sup>${content}</sup>`; |
| | if (elem.tagName === "sub") return `<sub>${content}</sub>`; |
| |
|
| | if (elem.tagName === "a" && content.trim().length > 0) { |
| | const href = elem.attributes.href; |
| | if (!href) return elem.content.map(stringifyHTMLElement).join(""); |
| | return `[${elem.content.map(stringifyHTMLElement).join("")}](${href})`; |
| | } |
| |
|
| | return elem.content.map(stringifyHTMLElement).join(""); |
| | } |
| |
|
| | |
| | export function stringifyHTMLElementsUnformatted( |
| | elems: (SerializedHTMLElement | string)[] |
| | ): string { |
| | return elems.map(stringifyHTMLElementUnformatted).join(""); |
| | } |
| |
|
| | |
| | function stringifyHTMLElementUnformatted(elem: SerializedHTMLElement | string): string { |
| | if (typeof elem === "string") return elem; |
| | return elem.content.map(stringifyHTMLElementUnformatted).join(""); |
| | } |
| |
|