File size: 4,631 Bytes
f91a684 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | /**
* Shared markdown pre-processing utilities used across content pages.
*/
/**
* Repairs unclosed or misplaced `**` bold markers.
*
* Common source examples:
* **Parameters CREATE TABLE: value here
* **Example CREATE TABLE movies (
* **Explanation In the above example ...
*/
export function fixMarkdownBold(text: string): string {
return text
.split('\n')
.map(line => {
const match = line.match(/^(\s*)\*\*(.*)$/);
if (!match) return line;
const [, indent, inner] = match;
if (inner.includes('**')) return line;
const colonIdx = inner.indexOf(':');
if (colonIdx !== -1) {
return `${indent}**${inner.slice(0, colonIdx)}**:${inner.slice(colonIdx + 1)}`;
}
const wordLabelMatch = inner.match(/^(Example|Explanation|Definition|Syntax|Note|Tip|Warning)\b(.*)$/i);
if (wordLabelMatch) {
const [, label, rest] = wordLabelMatch;
return `${indent}**${label}**${rest}`;
}
const parenIdx = inner.indexOf('(');
if (parenIdx !== -1) {
return `${indent}**${inner.slice(0, parenIdx).trimEnd()}** ${inner.slice(parenIdx).trimStart()}`;
}
const spaceIdx = inner.indexOf(' ');
if (spaceIdx !== -1) {
return `${indent}**${inner.slice(0, spaceIdx)}** ${inner.slice(spaceIdx + 1)}`;
}
return `${indent}**${inner}**`;
})
.join('\n');
}
/**
* Removes common stray artifact patterns from converted markdown.
*/
export function cleanMarkdown(text: string): string {
return text
.replace(/\u0026#x09;/g, '')
.replace(/\\&/g, '&')
.replace(/--\*\*/g, '')
.replace(/^\s*\*\*\s*$/gm, '')
.replace(/^[*\-]+\s*$/gm, '')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
/**
* Robust ASCII table → GFM Markdown converter.
* Handles all known divider styles: +---+ , ~---~ , +===+, ----+----, and pipe rows.
* Strips wrapping code fences that sometimes surround tables in the raw data.
* Replaces empty / NULL cells with "N/A".
*/
export function processAsciiTableToMarkdown(text: string): string {
const lines = text.split('\n');
const result: string[] = [];
let inTable = false;
let inTableBlock = false;
let tableRows: string[] = [];
// Detect any ASCII table border line
const isDividerLine = (s: string) => /^[+~|][-=~+]+[+~|]$/.test(s) || /^[-=]{3,}$/.test(s);
const cleanCell = (cell: string) => {
const v = cell.trim();
return (v === '' || /^null$/i.test(v)) ? 'N/A' : v;
};
const flushTable = () => {
if (tableRows.length === 0) return;
// First row = header
result.push('');
result.push(tableRows[0]);
const colCount = tableRows[0].split('|').length - 2;
result.push('|' + Array(Math.max(1, colCount)).fill(' --- ').join('|') + '|');
for (let i = 1; i < tableRows.length; i++) result.push(tableRows[i]);
result.push('');
tableRows = [];
};
for (let i = 0; i < lines.length; i++) {
const rawLine = lines[i].trim();
// Strip surrounding backticks for analysis
const line = rawLine.replace(/^[\`]+|[\`]+$/g, '').trim();
// Check if this is an opening code block that contains a table
if (rawLine.startsWith('```') && !inTableBlock) {
let blockContainsTable = false;
for (let j = i + 1; j < lines.length; j++) {
const peekRaw = lines[j].trim();
if (peekRaw.startsWith('```')) break; // End of block
const peek = peekRaw.replace(/^[\`]+|[\`]+$/g, '').trim();
if (isDividerLine(peek)) {
blockContainsTable = true;
break;
}
}
if (blockContainsTable) {
inTableBlock = true;
continue; // Skip the opening ```
}
}
// Check if this is the closing code block of a table block
if (rawLine.startsWith('```') && inTableBlock) {
if (inTable) {
flushTable();
inTable = false;
}
inTableBlock = false;
continue; // Skip the closing ```
}
if (isDividerLine(line)) {
inTable = true;
continue; // skip divider
}
// A row is a data row if it contains a pipe.
const isDataRow = line.includes('|') && line.startsWith('|') && line.endsWith('|');
if (isDataRow) {
inTable = true;
const parts = line.split('|').slice(1, -1);
tableRows.push(`| ${parts.map(p => cleanCell(p)).join(' | ')} |`);
} else if (inTable && line === '') {
continue; // blank inside table
} else {
if (inTable) {
flushTable();
inTable = false;
}
result.push(rawLine);
}
}
if (inTable) flushTable();
return result.join('\n');
}
|