/**
* v2.0.72 (#115 #120 root-cause workaround) — NLU intent extractor.
*
* Cascade upstream's `SendUserCascadeMessage` proto has no OpenAI
* `tools[]` field. The proxy injects tool definitions into the system
* prompt (additional_instructions_section), but GPT / GLM / Kimi
* weren't trained on prompt-level tool-calling protocols — they see the
* `{"name":...}` instructions, decide to call
* the tool, but emit it as natural-language NARRATION instead of the
* exact markup we asked for. v2.0.71 fabricate detection just flagged
* these as failures; v2.0.72 actually RECOVERS the call.
*
* Real probe captures (from scripts/probes/v2071-glm-kimi-tool-probe):
*
* GLM-4.7 → "I should call the shell_exec function with the command
* 'echo HELLO_FROM_PROBE'."
* GLM-5.1 → "I'll run the shell command as requested." (no args!)
* GPT-5.5 → "PROBE_V0270_1777751588" (pure fabricated output)
*
* The first one carries enough signal to reconstruct the call; the
* second has the intent but no args; the third is hopeless. Layered
* extraction:
*
* Layer 1 (highest confidence) — explicit invocation syntax:
* "Let me run shell_command(command='echo HELLO')"
* "function_call: shell_exec(\"echo HELLO\")"
*
* Layer 2 — backtick-quoted name + value:
* "I'll call `shell_exec` with command `echo HELLO`"
* "use the `Read` function with file_path `/etc/hosts`"
*
* Layer 3 — natural narrative (model "thinking out loud"):
* "I should call the shell_exec function with the command 'echo HI'"
* "Let me invoke the Read tool to read /etc/hosts"
*
* Each layer requires the extracted name to match a caller-declared
* tool. Layer 3 also requires the user prompt to plausibly want a
* tool call (shell-style verbs in the most recent user message).
*
* Conservative by design: false-positive tool_calls drive agent loops
* to execute things the model didn't actually decide on. When in
* doubt, return [].
*/
import { log } from '../config.js';
/**
* @typedef {Object} ExtractedToolCall
* @property {string} name OpenAI tool name (matches caller's tools[])
* @property {string} argumentsJson JSON-stringified args
* @property {'explicit-syntax'|'backtick-quoted'|'narrative'} layer
* @property {number} confidence 0..1
*/
/**
* Build a Set of declared tool names + a name → primaryParamName map
* for inference of single-arg shorthands ("with command 'echo X'" →
* arguments.command = 'echo X').
*/
function indexTools(tools) {
const names = new Set();
const primaryParam = new Map(); // tool name → first required string param
if (!Array.isArray(tools)) return { names, primaryParam };
for (const t of tools) {
if (t?.type !== 'function') continue;
const name = t.function?.name;
if (!name || typeof name !== 'string') continue;
names.add(name);
const params = t.function?.parameters;
if (params?.type === 'object' && params.properties) {
const required = Array.isArray(params.required) ? params.required : [];
let primary = required[0];
// Prefer the first required string-typed param (`command`,
// `file_path`, `query`) — that's the one models naturally
// mention with "with command X" / "with file Y" narrative.
for (const r of required) {
const p = params.properties[r];
if (p?.type === 'string') { primary = r; break; }
}
// Fall through to first declared property if no required ones.
if (!primary) {
const keys = Object.keys(params.properties || {});
primary = keys.find(k => params.properties[k]?.type === 'string') || keys[0];
}
if (primary) primaryParam.set(name, primary);
}
}
return { names, primaryParam };
}
// Regex utilities — escape user-controlled tool name for regex insertion.
function escapeRe(s) {
return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
// v2.0.78 (#120 follow-up + audit H-2): values extracted from narrative
// can easily be a generic noun phrase ("a shell command", "the file",
// "your input") or a literal placeholder keyword ("command",
// "argument"). Both produce garbage tool_calls — the agent loop will
// then try to execute `command` as a literal command, fail, and recurse.
// Reject these uniformly across all three layers.
const PLACEHOLDER_KEYWORDS = new Set([
'command', 'argument', 'arguments', 'param', 'parameter',
'parameters', 'input', 'value', 'file_path', 'filepath', 'path',
'query', 'string', 'text', 'name', 'arg', 'output',
// v2.0.81 (#125 — GLM-5.1 Chinese narrate): models echo Chinese
// param-name keywords as the value too. "调用 shell_exec 命令 '命令'"
// would otherwise produce a real tool_call with command='命令'.
'命令', '参数', '文件', '路径', '输入', '值', '字符串', '文本', '名称', '查询', '输出',
]);
const ARTICLE_PREFIX_RE = /^(?:a|an|the|this|that|these|those|your|my|our|some|any|each|every)\s+/i;
// Chinese article-led / vague phrase prefixes — "某个命令" / "一个命令"
// / "某种参数" — same idea as ARTICLE_PREFIX_RE but for CJK.
const CN_VAGUE_PREFIX_RE = /^(?:某个?|一个|这个|那个|某种|什么|任何|每个|所有的?)/;
function looksLikePlaceholderValue(value) {
if (typeof value !== 'string' || !value.trim()) return true;
const v = value.trim();
// Strip trailing punctuation (`.`, `,`, `;`, `:`, `。`, `,`) before comparison.
const stripped = v.replace(/[.,;:!?。,;:!?]+$/, '');
if (PLACEHOLDER_KEYWORDS.has(stripped.toLowerCase())) return true;
// Article-led phrase ("a shell command", "the file") — model
// narrating about the call rather than supplying the call value.
if (ARTICLE_PREFIX_RE.test(stripped)) return true;
// Chinese vague prefix — "某个命令", "一个文件", "这个参数"
if (CN_VAGUE_PREFIX_RE.test(stripped)) return true;
return false;
}
/**
* Layer 1: explicit invocation syntax.
*
* shell_command(command="echo X")
* shell_exec("echo X")
* function_call: name=shell_exec args={"command":"echo X"}
*/
function extractLayer1(text, names) {
const out = [];
// function_name(arg=value) or function_name("value")
const reExplicit = /\b([A-Za-z_][A-Za-z0-9_]*)\s*\(\s*(?:([A-Za-z_][A-Za-z0-9_]*)\s*=\s*)?["'`]([^"'`)]{1,2000})["'`]\s*\)/g;
let m;
while ((m = reExplicit.exec(text)) !== null) {
const [, fn, paramName, value] = m;
if (!names.has(fn)) continue;
if (looksLikePlaceholderValue(value)) continue;
const args = paramName ? { [paramName]: value } : { _value: value };
out.push({
name: fn,
argumentsJson: JSON.stringify(args),
layer: 'explicit-syntax',
confidence: paramName ? 0.95 : 0.85,
});
}
// function_call: name=X args={...}
const reFc = /function[_\s]?call\s*[:=][^{]*?\bname\s*[:=]\s*["'`]?([A-Za-z_][A-Za-z0-9_]*)["'`]?[^{]*?(\{[\s\S]{1,2000}?\})/g;
while ((m = reFc.exec(text)) !== null) {
const [, fn, argsBlob] = m;
if (!names.has(fn)) continue;
let args = {};
try { args = JSON.parse(argsBlob); } catch {}
out.push({
name: fn,
argumentsJson: JSON.stringify(args),
layer: 'explicit-syntax',
confidence: 0.9,
});
}
return out;
}
/**
* Layer 2: backtick-quoted name + later backtick-quoted value.
*
* "I'll call `shell_exec` with command `echo HELLO`"
* "use the `Read` function with file_path `/etc/hosts`"
*/
function extractLayer2(text, names, primaryParam) {
const out = [];
for (const fn of names) {
const fnRe = new RegExp(`\\\`${escapeRe(fn)}\\\``, 'g');
let m;
while ((m = fnRe.exec(text)) !== null) {
// Look for next backtick-quoted token within 200 chars
const tail = text.slice(m.index + m[0].length, m.index + m[0].length + 200);
// Capture optional "with PARAM `value`" or just "`value`"
const argRe = /(?:with\s+)?(?:the\s+)?(?:argument|param|parameter|input|command|file[_-]?path|path|query)?\s*[:=]?\s*`([^`]{1,1000})`/i;
const a = tail.match(argRe);
if (!a) continue;
const value = a[1];
if (looksLikePlaceholderValue(value)) continue;
const param = primaryParam.get(fn) || 'input';
out.push({
name: fn,
argumentsJson: JSON.stringify({ [param]: value }),
layer: 'backtick-quoted',
confidence: 0.8,
});
}
}
return out;
}
/**
* Layer 3: natural narrative.
*
* "I should call the shell_exec function with the command 'echo HI'"
* "Let me invoke the Read tool to read /etc/hosts"
* "I'll run shell_command with command echo HELLO"
*/
function extractLayer3(text, names, primaryParam) {
const out = [];
// v2.0.81 (#125 DuZunTianXia): GLM-5.1 narrate in Chinese — log
// showed "让我用 Bash 来列出..." / "用户想查看..." / "我会调用 X
// 工具" — none of which the English-only verb regex picked up.
// Add Chinese verbs alongside English so the name pattern matches
// either language (or mixed). The primary tool-name match still
// requires the literal tool name (e.g. `Bash`, `shell_exec`) since
// those are emitted in the original alphabet by every model.
const verbs = '(?:call|invoke|run|use|execute|exec|trigger|fire'
+ '|调用|使用|运行|执行|触发|启动|让我用|让我使用|我会用|我将用|通过|借助|采用)';
const articles = '(?:the\\s+)?';
// Suffix matches ONLY tool/function meta-words (not arg labels like
// "command" / "命令") so the latter stay in the tail and feed the
// argPatterns. Pre-v2.0.81 it included "command" / "命令" which
// greedily consumed the very keyword that argPattern 2/4 needs.
const suffix = '(?:\\s+(?:function|tool|method|函数|工具|方法))?';
for (const fn of names) {
// Pattern: " [the] [function|tool] [function|tool]"
// \b doesn't match between Chinese and Latin, so we drop the
// leading word boundary and rely on the verb list itself.
const namePat = new RegExp(
`${verbs}\\s*${articles}(?:function|tool|method|函数|工具|方法)?\\s*\\\`?${escapeRe(fn)}\\\`?${suffix}`,
'gi',
);
let m;
while ((m = namePat.exec(text)) !== null) {
// Hunt for value within next 300 chars
const tail = text.slice(m.index + m[0].length, m.index + m[0].length + 300);
// ordered by specificity:
const argPatterns = [
// with the command 'echo X' / with command "echo X" / with command `echo X`
/\bwith\s+(?:the\s+)?(?:command|argument|param(?:eter)?|input|file[_-]?path|path|query)\s+["'`]([^"'`\n]{1,500})["'`]/i,
// bare keyword + value (no "with"): command 'echo X' / argument "X"
/(?:^|\s)(?:command|argument|param(?:eter)?|input|file[_-]?path|path|query)\s+["'`]([^"'`\n]{1,500})["'`]/i,
// 中文:用命令 'X' / 传入 'X' / 参数 'X' / 命令 'X' / 路径 'X'
/(?:用|使用|传入|输入|参数(?:为)?|命令(?:为)?|路径(?:为)?|文件(?:为)?|查询(?:为)?)\s*["'`「『]([^"'`\n「」『』]{1,500})["'`」』]/,
// with 'echo X' (no param keyword)
/\bwith\s+["'`]([^"'`\n]{1,500})["'`]/i,
// to read /etc/hosts (positional after action verb)
/\bto\s+(?:read|run|execute|view|search|find|cat|ls)\s+([\S][^\n]{0,200})/i,
// : 'echo X' / = 'echo X'
/[:=]\s*["'`]([^"'`\n]{1,500})["'`]/,
// last resort: very first quoted string in the tail
/^[\s,,。.]*["'`「『]([^"'`\n「」『』]{1,500})["'`」』]/,
];
let value = null;
for (const pat of argPatterns) {
const a = tail.match(pat);
if (a && a[1]) { value = a[1].trim(); break; }
}
if (!value) continue;
// v2.0.76 + v2.0.78 (audit H-2): reject placeholder keywords
// (`command` / `argument` / ...) AND article-led prose phrases
// (`a shell command` / `the file` / `your input`). GLM-4.7
// narrative reproducer "to run a shell command" was capturing
// "a shell command." as the value pre-v2.0.78 even with the
// single-word filter in place.
if (looksLikePlaceholderValue(value)) continue;
const param = primaryParam.get(fn) || 'input';
out.push({
name: fn,
argumentsJson: JSON.stringify({ [param]: value }),
layer: 'narrative',
confidence: 0.65,
});
}
}
return out;
}
/**
* Detect whether the user prompt asked for an action a function could
* perform. Layer 3 (narrative) only fires when this is true to avoid
* false-positive tool_call extraction from casual chat.
*/
function userPromptLooksActionable(lastUserText) {
if (!lastUserText) return false;
// v2.0.81 (#125): widen to Chinese verbs/nouns so GLM-5.1 / Kimi
// running with a Chinese system prompt + Chinese user turn still
// routes through Layer 3.
if (/\b(?:run|exec|execute|cat|ls|echo|grep|find|read|search|list|invoke|call|fetch|get|fix|edit|write|patch)\b/i.test(lastUserText)) return true;
if (/\b(?:shell|bash|terminal|command|tool|function|file|path)\b/i.test(lastUserText)) return true;
if (/(?:运行|执行|读取|查看|列出|查找|搜索|获取|修改|编辑|写入|修复|分析|调用|使用|拉取|下载|找到|看一下|看看|检查)/.test(lastUserText)) return true;
if (/(?:文件|目录|路径|命令|工具|函数|参数|项目|代码|配置)/.test(lastUserText)) return true;
return false;
}
/**
* Detect whether the model's narrative looks like it INTENDED to call
* a tool but never produced a usable extraction. Used to gate the
* retry-with-correction loop in chat.js — we only burn an extra
* cascade round-trip when there's clear tool intent we couldn't
* recover.
*
* Returns one of:
* - the matched declared tool name (when the model named it inline)
* - the FIRST declared tool name (when the narrative shows clear
* action intent + user actionable prompt + an action verb,
* even if the model didn't name a specific tool — GLM-5.1 will
* say "Let me list the files" without saying "Bash")
* - null when there's no usable signal
*
* v2.0.82 (#125 — proper translator layer beyond NLU).
*/
export function detectToolIntentInNarrative(text, tools, opts = {}) {
if (typeof text !== 'string' || !text.trim()) return null;
if (!Array.isArray(tools) || !tools.length) return null;
const lastUserText = opts.lastUserText || '';
if (!userPromptLooksActionable(lastUserText)) return null;
const { names } = indexTools(tools);
if (!names.size) return null;
// Verb forms (English + Chinese) that signal "I'm about to call X".
const verbPattern = /\b(?:call|invoke|run|use|execute|exec|trigger|fire|going to|will|let me|i'?ll|i'?m going|need to|should)\b|(?:调用|使用|运行|执行|触发|启动|让我|我会|我将|准备|打算|想要|需要|应该)/i;
if (!verbPattern.test(text)) return null;
// Action keywords (file ops, search, read, etc.) — these stand in
// for "the model is talking about USING tools generically".
const actionVerbPattern = /\b(?:list|show|read|cat|grep|find|search|view|fetch|get|create|write|edit|run|execute|check|inspect|examine|analyz|browse|explore)\b|(?:列出|展示|读取|查看|查找|搜索|获取|拉取|下载|创建|写入|编辑|运行|执行|检查|检视|分析|浏览|探索|看一下|看看)/i;
// Pass 1: specific tool name in narrative (most precise).
for (const fn of names) {
const fnRe = new RegExp(`\\b${escapeRe(fn)}\\b|\\\`${escapeRe(fn)}\\\``);
if (fnRe.test(text)) return fn;
}
// Pass 2: action keyword present (model said "let me list..." but
// didn't name the tool). Return the first declared tool — caller's
// correction prompt will name it explicitly so the retry knows
// which tool to emit.
if (actionVerbPattern.test(text)) return [...names][0];
return null;
}
/**
* Top-level extractor. Returns a deduped, confidence-sorted list of
* extracted tool_calls. Empty array when nothing is recoverable.
*
* Set the `WINDSURFAPI_NLU_RECOVERY=0` env to turn off entirely
* (default ON).
*/
export function extractIntentFromNarrative(text, tools, opts = {}) {
if (process.env.WINDSURFAPI_NLU_RECOVERY === '0') return [];
if (typeof text !== 'string' || !text.trim()) return [];
if (!Array.isArray(tools) || !tools.length) return [];
const lastUserText = opts.lastUserText || '';
const minConfidence = typeof opts.minConfidence === 'number' ? opts.minConfidence : 0.65;
// v2.0.78 (audit H-4): structural markers MAY indicate a malformed
// protocol attempt — Layer 3 narrative around it tends to be
// descriptive prose, not args. v2.0.79 narrowed the gate after
// GLM-4.7 e2e probe regressed: GLM emits `markers=bare_json`
// (because thinking text contains JSON-shaped fragments) AND a
// legitimate narrate; Layer 3 is exactly what catches the narrate.
// Now we only skip Layer 3 for `xml_tag` (Claude's tool_use shape)
// — that's where parser-failure → Layer 3 most often produces
// false positives. fenced_json / bare_json / openai_native still
// allow Layer 3 because models emitting those shapes (GLM, Kimi,
// some GPT) also reliably narrate the call in surrounding prose.
const markers = Array.isArray(opts.markers) ? opts.markers : [];
const skipLayer3 = markers.includes('xml_tag') && !markers.includes('natural_lang');
const { names, primaryParam } = indexTools(tools);
if (!names.size) return [];
const all = [
...extractLayer1(text, names),
...extractLayer2(text, names, primaryParam),
...(!skipLayer3 && userPromptLooksActionable(lastUserText) ? extractLayer3(text, names, primaryParam) : []),
];
if (!all.length) return [];
// Dedupe by (name, argumentsJson). Keep the highest-confidence pick.
const byKey = new Map();
for (const tc of all) {
if (tc.confidence < minConfidence) continue;
const key = `${tc.name}::${tc.argumentsJson}`;
const existing = byKey.get(key);
if (!existing || tc.confidence > existing.confidence) byKey.set(key, tc);
}
const recovered = [...byKey.values()].sort((a, b) => b.confidence - a.confidence);
if (recovered.length) {
log.info(`NLU recovery: extracted ${recovered.length} tool_call(s) from narrative — ${recovered.map(t => `${t.name}@${t.layer}/${t.confidence.toFixed(2)}`).join(', ')}${skipLayer3 ? ' (layer3-skipped: structural markers seen)' : ''}`);
}
return recovered;
}