W
File size: 7,938 Bytes
2b64d42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
// v2.0.72 (#115 #120) β€” NLU intent extractor tests.
//
// Cover real captures from probe runs against GLM-4.7 / GLM-5.1 / GPT-5.5 /
// Kimi-K2 in cascade backend, plus synthetic patterns we expect future
// models to use. Layer 1 (explicit syntax) β†’ Layer 3 (narrative) ordered
// by confidence.

import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { extractIntentFromNarrative } from '../src/handlers/intent-extractor.js';

const fnTool = (name, props = { command: 'string' }, required = ['command']) => ({
  type: 'function',
  function: {
    name,
    description: `${name} description`,
    parameters: {
      type: 'object',
      properties: Object.fromEntries(Object.entries(props).map(([k, t]) => [k, { type: t }])),
      required,
    },
  },
});

const SHELL_TOOL = fnTool('shell_exec');
const READ_TOOL = fnTool('Read', { file_path: 'string' }, ['file_path']);
const ACTIONABLE = { lastUserText: 'run shell_exec to echo something' };

describe('Layer 1 β€” explicit invocation syntax', () => {
  it('extracts shell_exec(command="echo HI")', () => {
    const r = extractIntentFromNarrative(
      'shell_exec(command="echo HI")',
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].name, 'shell_exec');
    assert.deepEqual(JSON.parse(r[0].argumentsJson), { command: 'echo HI' });
    assert.equal(r[0].layer, 'explicit-syntax');
    assert.ok(r[0].confidence >= 0.9);
  });

  it('extracts function_call: name=shell_exec args={"command":"X"}', () => {
    const r = extractIntentFromNarrative(
      'function_call: name=shell_exec args={"command":"echo X"}',
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].name, 'shell_exec');
    assert.equal(JSON.parse(r[0].argumentsJson).command, 'echo X');
  });

  it('rejects fn name not in tools[]', () => {
    const r = extractIntentFromNarrative(
      'os_command(cmd="echo X")', [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 0);
  });
});

describe('Layer 2 β€” backtick-quoted name + value', () => {
  it("extracts I'll call `shell_exec` with command `echo HI`", () => {
    const r = extractIntentFromNarrative(
      "I'll call `shell_exec` with command `echo HI`",
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].layer, 'backtick-quoted');
    assert.deepEqual(JSON.parse(r[0].argumentsJson), { command: 'echo HI' });
  });

  it('extracts use the `Read` function with file_path `/etc/hosts`', () => {
    const r = extractIntentFromNarrative(
      'use the `Read` function with file_path `/etc/hosts`',
      [READ_TOOL], { lastUserText: 'read the file at /etc/hosts' },
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].name, 'Read');
    assert.deepEqual(JSON.parse(r[0].argumentsJson), { file_path: '/etc/hosts' });
  });
});

describe('Layer 3 β€” natural narrative (live GLM-4.7 reproducer)', () => {
  it("LIVE: 'I should call the shell_exec function with the command \"echo HELLO_FROM_PROBE\"'", () => {
    // This is the actual emit captured from glm-4.7 probe before v2.0.72.
    const r = extractIntentFromNarrative(
      'I should call the shell_exec function with the command "echo HELLO_FROM_PROBE".',
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].name, 'shell_exec');
    assert.deepEqual(JSON.parse(r[0].argumentsJson), { command: 'echo HELLO_FROM_PROBE' });
    assert.equal(r[0].layer, 'narrative');
  });

  it("'Let me run shell_exec with command echo HI'", () => {
    const r = extractIntentFromNarrative(
      "Let me run shell_exec with command 'echo HI'",
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].name, 'shell_exec');
  });

  it("'I'll invoke the Read tool to read /etc/hosts'", () => {
    const r = extractIntentFromNarrative(
      "I'll invoke the Read tool to read /etc/hosts",
      [READ_TOOL], { lastUserText: 'read /etc/hosts' },
    );
    assert.equal(r.length, 1);
    assert.equal(r[0].name, 'Read');
    assert.deepEqual(JSON.parse(r[0].argumentsJson), { file_path: '/etc/hosts' });
  });

  it('Layer 3 only fires when user prompt is actionable', () => {
    const r = extractIntentFromNarrative(
      'I should call the shell_exec function with the command "echo HI".',
      [SHELL_TOOL],
      { lastUserText: 'tell me about your day' }, // NOT actionable
    );
    assert.equal(r.length, 0);
  });

  // v2.0.76 follow-up β€” caught in v2.0.75 e2e probe against glm-4.7.
  // GLM emitted "...with command 'command'" (the literal word) which
  // made the regex bind value="command". Filter placeholder values.
  it("rejects placeholder values ('command' / 'argument' / 'input' / etc.)", () => {
    const r = extractIntentFromNarrative(
      "I'll call shell_exec with command 'command'.",
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 0);
  });

  it("dedupes when narrative says the real command then echoes 'with command command'", () => {
    // Real GLM-4.7 v2.0.75 probe pattern that produced 2 tool_calls,
    // one valid and one bogus. Now should produce just 1.
    const r = extractIntentFromNarrative(
      `I'll call shell_exec with command 'echo HELLO'. The user wants me to use the shell_exec function with command 'command' as the parameter name.`,
      [SHELL_TOOL], ACTIONABLE,
    );
    assert.equal(r.length, 1);
    assert.deepEqual(JSON.parse(r[0].argumentsJson), { command: 'echo HELLO' });
  });
});

describe('robustness', () => {
  it('returns [] for hopeless fabricated output (just a number)', () => {
    const r = extractIntentFromNarrative('1777751588', [SHELL_TOOL], ACTIONABLE);
    assert.equal(r.length, 0);
  });

  it('returns [] for empty / null input', () => {
    assert.deepEqual(extractIntentFromNarrative('', [SHELL_TOOL], ACTIONABLE), []);
    assert.deepEqual(extractIntentFromNarrative(null, [SHELL_TOOL], ACTIONABLE), []);
    assert.deepEqual(extractIntentFromNarrative('text', [], ACTIONABLE), []);
  });

  it('dedupes identical extractions', () => {
    const text = 'I should call the shell_exec function with the command "echo X". '
      + 'shell_exec(command="echo X")';
    const r = extractIntentFromNarrative(text, [SHELL_TOOL], ACTIONABLE);
    // Same (name, args) β†’ 1 entry. Layer 1 wins on confidence.
    assert.equal(r.length, 1);
    assert.equal(r[0].layer, 'explicit-syntax');
  });

  it('keeps multiple distinct tool_calls', () => {
    const text = 'shell_exec(command="ls")\nshell_exec(command="pwd")';
    const r = extractIntentFromNarrative(text, [SHELL_TOOL], ACTIONABLE);
    assert.equal(r.length, 2);
    const cmds = r.map(x => JSON.parse(x.argumentsJson).command).sort();
    assert.deepEqual(cmds, ['ls', 'pwd']);
  });

  it('env WINDSURFAPI_NLU_RECOVERY=0 disables extractor entirely', () => {
    const orig = process.env.WINDSURFAPI_NLU_RECOVERY;
    process.env.WINDSURFAPI_NLU_RECOVERY = '0';
    try {
      const r = extractIntentFromNarrative(
        'shell_exec(command="echo HI")', [SHELL_TOOL], ACTIONABLE,
      );
      assert.equal(r.length, 0);
    } finally {
      if (orig !== undefined) process.env.WINDSURFAPI_NLU_RECOVERY = orig;
      else delete process.env.WINDSURFAPI_NLU_RECOVERY;
    }
  });
});

describe('confidence threshold opt', () => {
  it('opt.minConfidence filters layer 3 narrative-only extractions', () => {
    // Default threshold lets narrative through (0.65). Bump to 0.8
    // and only Layer 1+2 survive.
    const text = 'I should call the shell_exec function with the command "echo HI".';
    const high = extractIntentFromNarrative(text, [SHELL_TOOL], { ...ACTIONABLE, minConfidence: 0.8 });
    assert.equal(high.length, 0);
    const low = extractIntentFromNarrative(text, [SHELL_TOOL], { ...ACTIONABLE, minConfidence: 0.5 });
    assert.equal(low.length, 1);
  });
});