Spaces:
Sleeping
Sleeping
| /** | |
| * src/utils/jsonCleaner.ts | |
| * | |
| * LLM responses almost never contain bare JSON. They typically: | |
| * - Wrap JSON in ```json β¦ ``` code fences | |
| * - Prepend sentences like "Here is the JSON:" | |
| * - Mix prose before/after the JSON block | |
| * - Contain nested braces inside string literals | |
| * | |
| * This module extracts the first valid JSON object `{}` or array `[]` | |
| * from any arbitrary LLM response string using a character-level scan | |
| * that correctly handles strings, escapes, and nesting. | |
| */ | |
| // βββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Extracts and parses the first JSON object or array from an LLM response. | |
| * | |
| * @param raw - The full LLM response text | |
| * @returns The parsed value | |
| * @throws Error if no valid JSON can be extracted | |
| */ | |
| export function extractJson<T = unknown>(raw: string): T { | |
| if (!raw || typeof raw !== 'string') { | |
| throw new Error('extractJson: input must be a non-empty string'); | |
| } | |
| // ββ Strategy 1: Strip a markdown code fence if present ββββββββββββββββββ | |
| const fenceMatch = raw.match( | |
| /```(?:json|yaml|js|javascript|typescript|dart)?\s*([\s\S]*?)```/i | |
| ); | |
| if (fenceMatch) { | |
| const inner = fenceMatch[1].trim(); | |
| try { | |
| return JSON.parse(inner) as T; | |
| } catch { | |
| // Fenced block wasn't valid JSON β fall through | |
| } | |
| } | |
| // ββ Strategy 2: Find first `{` or `[` and matching close bracket ββββββββ | |
| const objStart = raw.indexOf('{'); | |
| const arrStart = raw.indexOf('['); | |
| // Determine which comes first | |
| let start: number; | |
| let openChar: '{' | '['; | |
| let closeChar: '}' | ']'; | |
| if (objStart === -1 && arrStart === -1) { | |
| throw new Error( | |
| `extractJson: no JSON object or array found in response.\n` + | |
| `Response preview: ${raw.slice(0, 200)}` | |
| ); | |
| } | |
| if (arrStart === -1 || (objStart !== -1 && objStart < arrStart)) { | |
| start = objStart; | |
| openChar = '{'; | |
| closeChar = '}'; | |
| } else { | |
| start = arrStart; | |
| openChar = '['; | |
| closeChar = ']'; | |
| } | |
| // Character-level scan: track depth, skip string contents | |
| let depth = 0; | |
| let inString = false; | |
| let escape = false; | |
| let end = -1; | |
| for (let i = start; i < raw.length; i++) { | |
| const ch = raw[i]; | |
| if (escape) { | |
| escape = false; | |
| continue; | |
| } | |
| if (inString) { | |
| if (ch === '\\') escape = true; | |
| else if (ch === '"') inString = false; | |
| continue; | |
| } | |
| // Not in a string | |
| if (ch === '"') { inString = true; continue; } | |
| if (ch === openChar) { depth++; continue; } | |
| if (ch === closeChar) { | |
| depth--; | |
| if (depth === 0) { end = i; break; } | |
| } | |
| } | |
| if (end === -1) { | |
| throw new Error( | |
| `extractJson: unbalanced brackets in LLM response.\n` + | |
| `Response preview: ${raw.slice(start, start + 300)}` | |
| ); | |
| } | |
| const candidate = raw.slice(start, end + 1); | |
| try { | |
| return JSON.parse(candidate) as T; | |
| } catch (parseErr) { | |
| // Last resort: attempt to fix common LLM JSON mistakes | |
| const repaired = attemptRepair(candidate); | |
| if (repaired !== null) return repaired as T; | |
| throw new Error( | |
| `extractJson: JSON.parse failed.\n` + | |
| `Error: ${String(parseErr)}\n` + | |
| `Candidate (first 300 chars): ${candidate.slice(0, 300)}` | |
| ); | |
| } | |
| } | |
| /** | |
| * Non-throwing variant of extractJson. | |
| * Returns `null` if extraction or parsing fails. | |
| */ | |
| export function tryExtractJson<T = unknown>(raw: string): T | null { | |
| try { | |
| return extractJson<T>(raw); | |
| } catch { | |
| return null; | |
| } | |
| } | |
| // βββ Private Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Attempts common repairs to malformed LLM JSON: | |
| * - Trailing commas before `}` or `]` | |
| * - Single-quoted strings | |
| * - Unquoted property keys (limited cases) | |
| */ | |
| function attemptRepair(json: string): unknown | null { | |
| let repaired = json; | |
| // Remove trailing commas in objects/arrays | |
| repaired = repaired.replace(/,\s*([}\]])/g, '$1'); | |
| // Replace single-quoted strings with double-quoted | |
| repaired = repaired.replace(/'/g, '"'); | |
| try { | |
| return JSON.parse(repaired); | |
| } catch { | |
| return null; | |
| } | |
| } | |