RYP / src /lib /markdownUtils.ts
Soumya79's picture
Upload 1361 files
f91a684 verified
/**
* Shared markdown pre-processing utilities used across content pages.
*/
/**
* Repairs unclosed or misplaced `**` bold markers.
*
* Common source examples:
* **Parameters CREATE TABLE: value here
* **Example CREATE TABLE movies (
* **Explanation In the above example ...
*/
export function fixMarkdownBold(text: string): string {
return text
.split('\n')
.map(line => {
const match = line.match(/^(\s*)\*\*(.*)$/);
if (!match) return line;
const [, indent, inner] = match;
if (inner.includes('**')) return line;
const colonIdx = inner.indexOf(':');
if (colonIdx !== -1) {
return `${indent}**${inner.slice(0, colonIdx)}**:${inner.slice(colonIdx + 1)}`;
}
const wordLabelMatch = inner.match(/^(Example|Explanation|Definition|Syntax|Note|Tip|Warning)\b(.*)$/i);
if (wordLabelMatch) {
const [, label, rest] = wordLabelMatch;
return `${indent}**${label}**${rest}`;
}
const parenIdx = inner.indexOf('(');
if (parenIdx !== -1) {
return `${indent}**${inner.slice(0, parenIdx).trimEnd()}** ${inner.slice(parenIdx).trimStart()}`;
}
const spaceIdx = inner.indexOf(' ');
if (spaceIdx !== -1) {
return `${indent}**${inner.slice(0, spaceIdx)}** ${inner.slice(spaceIdx + 1)}`;
}
return `${indent}**${inner}**`;
})
.join('\n');
}
/**
* Removes common stray artifact patterns from converted markdown.
*/
export function cleanMarkdown(text: string): string {
return text
.replace(/\u0026#x09;/g, '')
.replace(/\\&/g, '&')
.replace(/--\*\*/g, '')
.replace(/^\s*\*\*\s*$/gm, '')
.replace(/^[*\-]+\s*$/gm, '')
.replace(/\n{3,}/g, '\n\n')
.trim();
}
/**
* Robust ASCII table → GFM Markdown converter.
* Handles all known divider styles: +---+ , ~---~ , +===+, ----+----, and pipe rows.
* Strips wrapping code fences that sometimes surround tables in the raw data.
* Replaces empty / NULL cells with "N/A".
*/
export function processAsciiTableToMarkdown(text: string): string {
const lines = text.split('\n');
const result: string[] = [];
let inTable = false;
let inTableBlock = false;
let tableRows: string[] = [];
// Detect any ASCII table border line
const isDividerLine = (s: string) => /^[+~|][-=~+]+[+~|]$/.test(s) || /^[-=]{3,}$/.test(s);
const cleanCell = (cell: string) => {
const v = cell.trim();
return (v === '' || /^null$/i.test(v)) ? 'N/A' : v;
};
const flushTable = () => {
if (tableRows.length === 0) return;
// First row = header
result.push('');
result.push(tableRows[0]);
const colCount = tableRows[0].split('|').length - 2;
result.push('|' + Array(Math.max(1, colCount)).fill(' --- ').join('|') + '|');
for (let i = 1; i < tableRows.length; i++) result.push(tableRows[i]);
result.push('');
tableRows = [];
};
for (let i = 0; i < lines.length; i++) {
const rawLine = lines[i].trim();
// Strip surrounding backticks for analysis
const line = rawLine.replace(/^[\`]+|[\`]+$/g, '').trim();
// Check if this is an opening code block that contains a table
if (rawLine.startsWith('```') && !inTableBlock) {
let blockContainsTable = false;
for (let j = i + 1; j < lines.length; j++) {
const peekRaw = lines[j].trim();
if (peekRaw.startsWith('```')) break; // End of block
const peek = peekRaw.replace(/^[\`]+|[\`]+$/g, '').trim();
if (isDividerLine(peek)) {
blockContainsTable = true;
break;
}
}
if (blockContainsTable) {
inTableBlock = true;
continue; // Skip the opening ```
}
}
// Check if this is the closing code block of a table block
if (rawLine.startsWith('```') && inTableBlock) {
if (inTable) {
flushTable();
inTable = false;
}
inTableBlock = false;
continue; // Skip the closing ```
}
if (isDividerLine(line)) {
inTable = true;
continue; // skip divider
}
// A row is a data row if it contains a pipe.
const isDataRow = line.includes('|') && line.startsWith('|') && line.endsWith('|');
if (isDataRow) {
inTable = true;
const parts = line.split('|').slice(1, -1);
tableRows.push(`| ${parts.map(p => cleanCell(p)).join(' | ')} |`);
} else if (inTable && line === '') {
continue; // blank inside table
} else {
if (inTable) {
flushTable();
inTable = false;
}
result.push(rawLine);
}
}
if (inTable) flushTable();
return result.join('\n');
}