File size: 9,783 Bytes
5da4770 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
// toolParser.ts
// A robust, agnostic parser to extract success flag, tool name, and JSON output from a ToolResult-like string.
interface ParseResult {
success: boolean | null;
toolName: string | null;
output: any;
error: string | null;
}
/**
* Parses a raw tool-result string and extracts:
* - success: boolean
* - toolName: string (from metadata)
* - output: parsed JSON or raw string
* - error: detailed error if parsing fails
*
* Uses a stateful scanner for reliable extraction of arbitrarily complex JSON or quoted strings.
*/
export function parseToolResult(raw: string): ParseResult {
const result: ParseResult = {
success: null,
toolName: null,
output: null,
error: null,
};
try {
// 1. Extract success flag via regex
const successMatch = raw.match(/\bsuccess\s*=\s*(true|false)\b/i);
if (successMatch) {
result.success = successMatch[1].toLowerCase() === 'true';
}
// 2. Locate output= position
const outIdx = raw.search(/\boutput\s*=/i);
if (outIdx === -1) throw new Error('No output= found');
// Move to the character after '='
let i = raw.indexOf('=', outIdx) + 1;
// Skip whitespace
while (i < raw.length && /\s/.test(raw[i])) i++;
if (i >= raw.length) throw new Error('Output value missing');
const startChar = raw[i];
let extracted: string;
if (startChar === '"' || startChar === "'") {
// Quoted string: find matching end quote
const quote = startChar;
i++;
let esc = false;
let buf = '';
while (i < raw.length) {
const ch = raw[i];
if (esc) {
buf += ch;
esc = false;
} else if (ch === '\\') {
esc = true;
} else if (ch === quote) {
break;
} else {
buf += ch;
}
i++;
}
extracted = buf;
} else if (startChar === '{' || startChar === '[') {
// JSON object or array: bracket matching
const open = startChar;
const close = open === '{' ? '}' : ']';
let depth = 0;
let buf = '';
while (i < raw.length) {
const ch = raw[i];
buf += ch;
if (ch === '"') {
// skip string contents
i++;
while (i < raw.length) {
const c2 = raw[i];
buf += c2;
if (c2 === '\\') {
i += 2;
continue;
}
if (c2 === '"') break;
i++;
}
} else if (ch === open) {
depth++;
} else if (ch === close) {
depth--;
if (depth === 0) {
break;
}
}
i++;
}
extracted = buf;
} else {
// Unquoted primitive: read until comma or closing parenthesis
let buf = '';
while (i < raw.length && !/[,)]/.test(raw[i])) {
buf += raw[i];
i++;
}
extracted = buf.trim();
}
// 3. Parse extracted value
try {
result.output = JSON.parse(extracted);
} catch (e) {
// Leave as raw string if JSON.parse fails
result.error = `Output JSON.parse error: ${e instanceof Error ? e.message : String(e)}`;
result.output = extracted;
}
// 4. Extract toolName from metadata if exists
if (result.output && typeof result.output === 'object') {
const md = (result.output as any).mcp_metadata;
if (md) result.toolName = md.full_tool_name || md.tool_name || null;
}
} catch (err: any) {
result.error = err.message || String(err);
}
return result;
}
type TokenType =
| 'LBRACE' | 'RBRACE'
| 'LBRACK' | 'RBRACK'
| 'COLON' | 'COMMA'
| 'STRING' | 'NUMBER'
| 'IDENT' | 'EOF';
interface Token { type: TokenType; value: string; line: number; col: number; }
export function cleanAndParse(messy: string): string {
// βββββββββββββββββββββββββββββββββββββββββ
// 1) Pre-process
// βββββββββββββββββββββββββββββββββββββββββ
// Remove spurious \bβs and literal backspace chars
messy = messy.replace(/\\b/g, '').replace(/\x08/g, '');
// Strip JS-style comments
messy = messy.replace(/\/\/.*$/gm, '')
.replace(/\/\*[\s\S]*?\*\//g, '');
// Normalize escaped newlines/tabs so they stay as two-char sequences
messy = messy.replace(/\\r\\n|\\n|\\r/g, '\\n')
.replace(/\\t/g, '\\t')
.replace(/\btruen\b/gi, 'true')
.replace(/,\s*([\]\}])/g, '$1'); // drop trailing commas
// βββββββββββββββββββββββββββββββββββββββββ
// 2) Tokenization (regex with sticky /y)
// βββββββββββββββββββββββββββββββββββββββββ
const TOKS = [
['WHITESPACE', /\s+/y],
['LBRACE' , /\{/y],
['RBRACE' , /\}/y],
['LBRACK' , /\[/y],
['RBRACK' , /\]/y],
['COLON' , /:/y],
['COMMA' , /,/y],
// Single- or double-quoted strings, allowing escaped chars
['STRING' , /"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'/y],
['NUMBER' , /-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/y],
['IDENT' , /[A-Za-z_]\w*/y],
['MISMATCH' , /./y],
] as const;
const tokens: Token[] = [];
let line = 1, col = 1, idx = 0;
while (idx < messy.length) {
let matched = false;
for (const [type, rx] of TOKS) {
rx.lastIndex = idx;
const m = rx.exec(messy);
if (!m) continue;
matched = true;
const text = m[0];
if (type !== 'WHITESPACE' && type !== 'MISMATCH') {
tokens.push({ type: type as TokenType, value: text, line, col });
}
// update line/col
const nl = text.match(/\n/g);
if (nl) {
line += nl.length;
col = text.length - text.lastIndexOf('\n');
} else {
col += text.length;
}
idx = rx.lastIndex;
break;
}
if (!matched) {
// should never happen, but skip one char to avoid infinite loop
idx++;
col++;
}
}
tokens.push({ type: 'EOF', value: '', line, col });
// βββββββββββββββββββββββββββββββββββββββββ
// 3) Recursive-descent parser w/ error recovery
// βββββββββββββββββββββββββββββββββββββββββ
let p = 0;
function peek() { return tokens[p]?.type; }
function advance() { return tokens[p++]; }
function escapeControlChars(s: string) {
return s.replace(/[\u0000-\u001F\x7F]/g, c =>
'\\u' + c.charCodeAt(0).toString(16).padStart(4, '0')
);
}
function parseValue(): any {
const tk = tokens[p];
try {
switch (tk.type) {
case 'LBRACE': return parseObject();
case 'LBRACK': return parseArray();
case 'STRING': {
const raw = advance().value;
let inner = raw.slice(1, -1);
// re-escape control chars & backslashes & quotes
inner = escapeControlChars(inner)
.replace(/\\/g, '\\\\')
.replace(/"/g, '\\"');
return JSON.parse(`"${inner}"`);
}
case 'NUMBER': {
const v = advance().value;
return v.includes('.') || /[eE]/.test(v)
? parseFloat(v) : parseInt(v, 10);
}
case 'IDENT': {
const lower = advance().value.toLowerCase();
if (lower === 'true') return true;
if (lower === 'false') return false;
if (lower === 'null') return null;
return lower; // fallback
}
default:
throw new Error(`Unexpected ${tk.type}`);
}
} catch (e) {
console.warn(`Parse error at ${tk.line}:${tk.col}: ${e instanceof Error ? e.message : String(e)}`);
// error recovery: skip to next comma or closing delimiter
while (p < tokens.length &&
!['COMMA','RBRACE','RBRACK','EOF'].includes(peek()!)) {
advance();
}
return null;
}
}
function parseObject(): any {
const obj: Record<string, any> = {};
advance(); // β{β
while (peek() !== 'RBRACE' && peek() !== 'EOF') {
if (peek() === 'COMMA') { advance(); continue; }
// key
let key: string | null = null;
const tk = tokens[p];
if (tk.type === 'STRING' || tk.type === 'IDENT') {
key = tk.type === 'STRING'
? JSON.parse(advance().value.replace(/^['"]|['"]$/g, '"'))
: advance().value;
} else {
console.warn(`Expected key at ${tk.line}:${tk.col}`);
advance(); continue;
}
if (peek() === 'COLON') advance();
else { console.warn(`Missing ':' after key at ${tk.line}:${tk.col}`); }
const val = parseValue();
if (key !== null) {
obj[key] = val;
}
if (peek() === 'COMMA') advance();
}
if (peek() === 'RBRACE') advance();
return obj;
}
function parseArray(): any[] {
const arr: any[] = [];
advance(); // β[β
while (peek() !== 'RBRACK' && peek() !== 'EOF') {
if (peek() === 'COMMA') { advance(); continue; }
arr.push(parseValue());
}
if (peek() === 'RBRACK') advance();
return arr;
}
const result = parseValue();
return JSON.stringify(result, null, 2);
}
|