File size: 6,506 Bytes
1dbc34b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
/**
 * JSON Extraction Utilities
 *
 * Robust JSON extraction from AI responses that may contain markdown,
 * code blocks, or other text mixed with JSON content.
 *
 * Used by various routes that parse structured output from Cursor or
 * Claude responses when structured output is not available.
 */

import { createLogger } from '@automaker/utils';

const logger = createLogger('JsonExtractor');

/**
 * Logger interface for optional custom logging
 */
export interface JsonExtractorLogger {
  debug: (message: string, ...args: unknown[]) => void;
  warn?: (message: string, ...args: unknown[]) => void;
}

/**
 * Options for JSON extraction
 */
export interface ExtractJsonOptions {
  /** Custom logger (defaults to internal logger) */
  logger?: JsonExtractorLogger;
  /** Required key that must be present in the extracted JSON */
  requiredKey?: string;
  /** Whether the required key's value must be an array */
  requireArray?: boolean;
}

/**
 * Extract JSON from response text using multiple strategies.
 *
 * Strategies tried in order:
 * 1. JSON in ```json code block
 * 2. JSON in ``` code block (no language)
 * 3. Find JSON object by matching braces (starting with requiredKey if specified)
 * 4. Find any JSON object by matching braces
 * 5. Parse entire response as JSON
 *
 * @param responseText - The raw response text that may contain JSON
 * @param options - Optional extraction options
 * @returns Parsed JSON object or null if extraction fails
 */
export function extractJson<T = Record<string, unknown>>(
  responseText: string,
  options: ExtractJsonOptions = {}
): T | null {
  const log = options.logger || logger;
  const requiredKey = options.requiredKey;
  const requireArray = options.requireArray ?? false;

  /**
   * Validate that the result has the required key/structure
   */
  const validateResult = (result: unknown): result is T => {
    if (!result || typeof result !== 'object') return false;
    if (requiredKey) {
      const obj = result as Record<string, unknown>;
      if (!(requiredKey in obj)) return false;
      if (requireArray && !Array.isArray(obj[requiredKey])) return false;
    }
    return true;
  };

  /**
   * Find matching closing brace by counting brackets
   */
  const findMatchingBrace = (text: string, startIdx: number): number => {
    let depth = 0;
    for (let i = startIdx; i < text.length; i++) {
      if (text[i] === '{') depth++;
      if (text[i] === '}') {
        depth--;
        if (depth === 0) {
          return i + 1;
        }
      }
    }
    return -1;
  };

  const strategies = [
    // Strategy 1: JSON in ```json code block
    () => {
      const match = responseText.match(/```json\s*([\s\S]*?)```/);
      if (match) {
        log.debug('Extracting JSON from ```json code block');
        return JSON.parse(match[1].trim());
      }
      return null;
    },

    // Strategy 2: JSON in ``` code block (no language specified)
    () => {
      const match = responseText.match(/```\s*([\s\S]*?)```/);
      if (match) {
        const content = match[1].trim();
        // Only try if it looks like JSON (starts with { or [)
        if (content.startsWith('{') || content.startsWith('[')) {
          log.debug('Extracting JSON from ``` code block');
          return JSON.parse(content);
        }
      }
      return null;
    },

    // Strategy 3: Find JSON object containing the required key (if specified)
    () => {
      if (!requiredKey) return null;

      const searchPattern = `{"${requiredKey}"`;
      const startIdx = responseText.indexOf(searchPattern);
      if (startIdx === -1) return null;

      const endIdx = findMatchingBrace(responseText, startIdx);
      if (endIdx > startIdx) {
        log.debug(`Extracting JSON with required key "${requiredKey}"`);
        return JSON.parse(responseText.slice(startIdx, endIdx));
      }
      return null;
    },

    // Strategy 4: Find any JSON object by matching braces
    () => {
      const startIdx = responseText.indexOf('{');
      if (startIdx === -1) return null;

      const endIdx = findMatchingBrace(responseText, startIdx);
      if (endIdx > startIdx) {
        log.debug('Extracting JSON by brace matching');
        return JSON.parse(responseText.slice(startIdx, endIdx));
      }
      return null;
    },

    // Strategy 5: Find JSON using first { to last } (may be less accurate)
    () => {
      const firstBrace = responseText.indexOf('{');
      const lastBrace = responseText.lastIndexOf('}');
      if (firstBrace !== -1 && lastBrace > firstBrace) {
        log.debug('Extracting JSON from first { to last }');
        return JSON.parse(responseText.slice(firstBrace, lastBrace + 1));
      }
      return null;
    },

    // Strategy 6: Try parsing the entire response as JSON
    () => {
      const trimmed = responseText.trim();
      if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
        log.debug('Parsing entire response as JSON');
        return JSON.parse(trimmed);
      }
      return null;
    },
  ];

  for (const strategy of strategies) {
    try {
      const result = strategy();
      if (validateResult(result)) {
        log.debug('Successfully extracted JSON');
        return result as T;
      }
    } catch {
      // Strategy failed, try next
    }
  }

  log.debug('Failed to extract JSON from response');
  return null;
}

/**
 * Extract JSON with a specific required key.
 * Convenience wrapper around extractJson.
 *
 * @param responseText - The raw response text
 * @param requiredKey - Key that must be present in the extracted JSON
 * @param options - Additional options
 * @returns Parsed JSON object or null
 */
export function extractJsonWithKey<T = Record<string, unknown>>(
  responseText: string,
  requiredKey: string,
  options: Omit<ExtractJsonOptions, 'requiredKey'> = {}
): T | null {
  return extractJson<T>(responseText, { ...options, requiredKey });
}

/**
 * Extract JSON that has a required array property.
 * Useful for extracting responses like { "suggestions": [...] }
 *
 * @param responseText - The raw response text
 * @param arrayKey - Key that must contain an array
 * @param options - Additional options
 * @returns Parsed JSON object or null
 */
export function extractJsonWithArray<T = Record<string, unknown>>(
  responseText: string,
  arrayKey: string,
  options: Omit<ExtractJsonOptions, 'requiredKey' | 'requireArray'> = {}
): T | null {
  return extractJson<T>(responseText, { ...options, requiredKey: arrayKey, requireArray: true });
}