File size: 4,631 Bytes
f91a684
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/**
 * Shared markdown pre-processing utilities used across content pages.
 */

/**
 * Repairs unclosed or misplaced `**` bold markers.
 *
 * Common source examples:
 *   **Parameters CREATE TABLE: value here
 *   **Example CREATE TABLE movies (
 *   **Explanation In the above example ...
 */
export function fixMarkdownBold(text: string): string {
  return text
    .split('\n')
    .map(line => {
      const match = line.match(/^(\s*)\*\*(.*)$/);
      if (!match) return line;

      const [, indent, inner] = match;
      if (inner.includes('**')) return line;

      const colonIdx = inner.indexOf(':');
      if (colonIdx !== -1) {
        return `${indent}**${inner.slice(0, colonIdx)}**:${inner.slice(colonIdx + 1)}`;
      }

      const wordLabelMatch = inner.match(/^(Example|Explanation|Definition|Syntax|Note|Tip|Warning)\b(.*)$/i);
      if (wordLabelMatch) {
        const [, label, rest] = wordLabelMatch;
        return `${indent}**${label}**${rest}`;
      }

      const parenIdx = inner.indexOf('(');
      if (parenIdx !== -1) {
        return `${indent}**${inner.slice(0, parenIdx).trimEnd()}** ${inner.slice(parenIdx).trimStart()}`;
      }

      const spaceIdx = inner.indexOf(' ');
      if (spaceIdx !== -1) {
        return `${indent}**${inner.slice(0, spaceIdx)}** ${inner.slice(spaceIdx + 1)}`;
      }

      return `${indent}**${inner}**`;
    })
    .join('\n');
}

/**
 * Removes common stray artifact patterns from converted markdown.
 */
export function cleanMarkdown(text: string): string {
  return text
    .replace(/\u0026#x09;/g, '')
    .replace(/\\&/g, '&')
    .replace(/--\*\*/g, '')
    .replace(/^\s*\*\*\s*$/gm, '')
    .replace(/^[*\-]+\s*$/gm, '')
    .replace(/\n{3,}/g, '\n\n')
    .trim();
}

/**
 * Robust ASCII table → GFM Markdown converter.
 * Handles all known divider styles: +---+ , ~---~ , +===+, ----+----, and pipe rows.
 * Strips wrapping code fences that sometimes surround tables in the raw data.
 * Replaces empty / NULL cells with "N/A".
 */
export function processAsciiTableToMarkdown(text: string): string {
  const lines = text.split('\n');
  const result: string[] = [];
  let inTable = false;
  let inTableBlock = false;
  let tableRows: string[] = [];

  // Detect any ASCII table border line
  const isDividerLine = (s: string) => /^[+~|][-=~+]+[+~|]$/.test(s) || /^[-=]{3,}$/.test(s);

  const cleanCell = (cell: string) => {
    const v = cell.trim();
    return (v === '' || /^null$/i.test(v)) ? 'N/A' : v;
  };

  const flushTable = () => {
    if (tableRows.length === 0) return;
    // First row = header
    result.push('');
    result.push(tableRows[0]);
    const colCount = tableRows[0].split('|').length - 2;
    result.push('|' + Array(Math.max(1, colCount)).fill(' --- ').join('|') + '|');
    for (let i = 1; i < tableRows.length; i++) result.push(tableRows[i]);
    result.push('');
    tableRows = [];
  };

  for (let i = 0; i < lines.length; i++) {
    const rawLine = lines[i].trim();
    // Strip surrounding backticks for analysis
    const line = rawLine.replace(/^[\`]+|[\`]+$/g, '').trim();

    // Check if this is an opening code block that contains a table
    if (rawLine.startsWith('```') && !inTableBlock) {
      let blockContainsTable = false;
      for (let j = i + 1; j < lines.length; j++) {
        const peekRaw = lines[j].trim();
        if (peekRaw.startsWith('```')) break; // End of block
        const peek = peekRaw.replace(/^[\`]+|[\`]+$/g, '').trim();
        if (isDividerLine(peek)) {
          blockContainsTable = true;
          break;
        }
      }
      if (blockContainsTable) {
        inTableBlock = true;
        continue; // Skip the opening ```
      }
    }

    // Check if this is the closing code block of a table block
    if (rawLine.startsWith('```') && inTableBlock) {
      if (inTable) {
        flushTable();
        inTable = false;
      }
      inTableBlock = false;
      continue; // Skip the closing ```
    }

    if (isDividerLine(line)) {
      inTable = true;
      continue; // skip divider
    }

    // A row is a data row if it contains a pipe.
    const isDataRow = line.includes('|') && line.startsWith('|') && line.endsWith('|');

    if (isDataRow) {
      inTable = true;
      const parts = line.split('|').slice(1, -1);
      tableRows.push(`| ${parts.map(p => cleanCell(p)).join(' | ')} |`);
    } else if (inTable && line === '') {
      continue; // blank inside table
    } else {
      if (inTable) {
        flushTable();
        inTable = false;
      }
      result.push(rawLine);
    }
  }
  if (inTable) flushTable();
  return result.join('\n');
}