incognitolm commited on
Commit
fb79c98
Β·
1 Parent(s): cad44cf
Files changed (2) hide show
  1. server/Searchworker.js +31 -0
  2. server/chatStream.js +54 -35
server/Searchworker.js ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // searchWorker.js β€” runs inside a worker_thread, fully isolated from the
2
+ // main WS server. Receives a query via workerData, performs the Gradio
3
+ // search, posts the result back via parentPort, then exits.
4
+ //
5
+ // Because this runs in its own V8 context / event loop, the Gradio client's
6
+ // internal SSE fetch stream cannot interfere with the main thread's ws server.
7
+
8
+ import { workerData, parentPort } from 'worker_threads';
9
+ import { Client } from '@gradio/client';
10
+
11
+ async function run() {
12
+ const { query } = workerData;
13
+ let client = null;
14
+ try {
15
+ client = await Client.connect('incognitolm/Web-Search');
16
+ const result = await client.predict('/perform_search', { query });
17
+ const raw = Array.isArray(result.data) ? result.data[0] : result.data;
18
+ if (!raw) throw new Error('Empty response from search endpoint');
19
+ const text = typeof raw === 'string' ? raw : JSON.stringify(raw);
20
+ parentPort.postMessage({ ok: true, result: text });
21
+ } catch (err) {
22
+ parentPort.postMessage({ ok: false, error: String(err) });
23
+ } finally {
24
+ try { client?.close?.(); } catch (_) {}
25
+ // Force-exit so the worker doesn't hang on lingering async handles
26
+ // (the SSE response body reader, heartbeat timer, etc.)
27
+ setTimeout(() => process.exit(0), 0);
28
+ }
29
+ }
30
+
31
+ run();
server/chatStream.js CHANGED
@@ -1,33 +1,55 @@
1
  import OpenAI from "openai";
2
- import { Client } from "@gradio/client";
 
 
3
  import { LIGHTNING_BASE } from "./config.js";
4
 
5
- // ── Web Search via Gradio client (fresh connection per call) ──────────────
6
  //
7
- // The original code kept a singleton Gradio client whose persistent internal
8
- // WebSocket would emit events that bled into the Node WS server, causing the
9
- // browser to see spurious disconnects after every search.
 
 
 
 
10
  //
11
- // Fix: create a brand-new Client.connect() for each search call, then
12
- // immediately destroy it when done. This matches the pattern in the working
13
- // Electron app (see tools.ts: ollamaSearch). There is no performance penalty
14
- // worth worrying about because searches are infrequent and the Space cold-
15
- // start cost dwarfs the connect overhead.
16
-
17
- async function gradioSearch(query) {
18
- let client = null;
19
- try {
20
- client = await Client.connect("incognitolm/Web-Search");
21
- const result = await client.predict("/perform_search", { query });
22
- // result.data is an array; search results are in the first element.
23
- const raw = Array.isArray(result.data) ? result.data[0] : result.data;
24
- if (!raw) throw new Error("Empty response from search endpoint");
25
- return typeof raw === "string" ? raw : JSON.stringify(raw);
26
- } finally {
27
- // Tear down the Gradio client's internal WebSocket so it doesn't linger
28
- // and interfere with the server's own WS connections.
29
- try { client?.close?.(); } catch (_) {}
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
 
33
  const SYSTEM_PROMPT =
@@ -172,14 +194,11 @@ export async function streamChat(ws, {
172
  const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);
173
 
174
  function normalizeMessage(msg) {
175
- // Drop asset entries (role: "image"/"video"/"audio") β€” these are UI-only
176
- // and sending them to the LLM causes invalid-role rejections / blank responses
177
  if (!VALID_ROLES.has(msg.role)) return null;
178
 
179
  if (msg.role === "assistant" && msg.tool_calls) {
180
  return { role: "assistant", content: "", tool_calls: msg.tool_calls };
181
  }
182
- // Flatten multipart content arrays (e.g. image attachments) to text-only for history
183
  if (Array.isArray(msg.content)) {
184
  const textOnly = msg.content
185
  .filter(b => b.type === "text")
@@ -277,8 +296,8 @@ function buildToolList(tools) {
277
  async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abortSignal, onToolCall, onNewAsset) {
278
  const toolResults = [];
279
  const authHeaders = {};
280
- if (accessToken) {
281
- authHeaders["Authorization"] = `Bearer ${accessToken}`;
282
  } else {
283
  console.log("No access token");
284
  }
@@ -335,9 +354,9 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
335
  onNewAsset({ role: "image", content: dataUrl });
336
  result = "Image generated successfully and shown to the user.";
337
  } else if (res.status == 402) {
338
- result = "An upgraded plan is required for higher limits."
339
  } else if (res.status == 429) {
340
- result = "Too many requests. Try again later."
341
  } else {
342
  result = `Image generation failed: ${res.status}`;
343
  }
@@ -363,9 +382,9 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
363
  onNewAsset({ role: "video", content: dataUrl });
364
  result = "Video generated successfully and shown to the user.";
365
  } else if (res.status == 402) {
366
- result = "An upgraded plan is required for higher limits."
367
  } else if (res.status == 429) {
368
- result = "Too many requests. Try again later."
369
  } else {
370
  result = `Video generation failed: ${res.status}`;
371
  }
@@ -385,7 +404,7 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
385
  onNewAsset({ role: "audio", content: dataUrl });
386
  result = "Audio generated successfully and shown to the user.";
387
  } else if (res.status == 429) {
388
- result = "Too many requests. Try again later."
389
  } else {
390
  result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
391
  }
 
1
  import OpenAI from "openai";
2
+ import { Worker } from "worker_threads";
3
+ import { fileURLToPath } from "url";
4
+ import path from "path";
5
  import { LIGHTNING_BASE } from "./config.js";
6
 
7
+ // ── Web Search via an isolated Worker thread ──────────────────────────────
8
  //
9
+ // The @gradio/client library opens a persistent SSE (Server-Sent Events)
10
+ // fetch stream for its session queue. Even after client.close() is called,
11
+ // the SSE response-body reader keeps an async iterator alive in the current
12
+ // event loop. When that iterator eventually settles (stream closed/errored
13
+ // by the remote server), it triggers internal callbacks that emit events
14
+ // on objects the Node `ws` library also watches β€” causing the browser to
15
+ // see a "connection lost" message immediately after every web search.
16
  //
17
+ // The only reliable fix is to run the Gradio client in a worker_thread so
18
+ // it gets its own V8 context and event loop. When the worker exits (via
19
+ // process.exit(0) in searchWorker.js), every handle it opened β€” SSE stream,
20
+ // heartbeat timer, etc. β€” is torn down with it, leaving the main thread's
21
+ // WS server completely untouched.
22
+
23
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
24
+ const WORKER_PATH = path.join(__dirname, "searchWorker.js");
25
+
26
+ function gradioSearch(query) {
27
+ return new Promise((resolve, reject) => {
28
+ const worker = new Worker(WORKER_PATH, { workerData: { query } });
29
+
30
+ const timeout = setTimeout(() => {
31
+ worker.terminate();
32
+ reject(new Error("Search timed out after 45s"));
33
+ }, 45_000);
34
+
35
+ worker.on("message", (msg) => {
36
+ clearTimeout(timeout);
37
+ if (msg.ok) resolve(msg.result);
38
+ else reject(new Error(msg.error));
39
+ });
40
+
41
+ worker.on("error", (err) => {
42
+ clearTimeout(timeout);
43
+ reject(err);
44
+ });
45
+
46
+ worker.on("exit", (code) => {
47
+ // By the time this fires the promise is already settled via "message".
48
+ // Only reject if the worker crashed without posting anything.
49
+ clearTimeout(timeout);
50
+ if (code !== 0) reject(new Error(`Search worker exited with code ${code}`));
51
+ });
52
+ });
53
  }
54
 
55
  const SYSTEM_PROMPT =
 
194
  const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);
195
 
196
  function normalizeMessage(msg) {
 
 
197
  if (!VALID_ROLES.has(msg.role)) return null;
198
 
199
  if (msg.role === "assistant" && msg.tool_calls) {
200
  return { role: "assistant", content: "", tool_calls: msg.tool_calls };
201
  }
 
202
  if (Array.isArray(msg.content)) {
203
  const textOnly = msg.content
204
  .filter(b => b.type === "text")
 
296
  async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abortSignal, onToolCall, onNewAsset) {
297
  const toolResults = [];
298
  const authHeaders = {};
299
+ if (accessToken) {
300
+ authHeaders["Authorization"] = `Bearer ${accessToken}`;
301
  } else {
302
  console.log("No access token");
303
  }
 
354
  onNewAsset({ role: "image", content: dataUrl });
355
  result = "Image generated successfully and shown to the user.";
356
  } else if (res.status == 402) {
357
+ result = "An upgraded plan is required for higher limits.";
358
  } else if (res.status == 429) {
359
+ result = "Too many requests. Try again later.";
360
  } else {
361
  result = `Image generation failed: ${res.status}`;
362
  }
 
382
  onNewAsset({ role: "video", content: dataUrl });
383
  result = "Video generated successfully and shown to the user.";
384
  } else if (res.status == 402) {
385
+ result = "An upgraded plan is required for higher limits.";
386
  } else if (res.status == 429) {
387
+ result = "Too many requests. Try again later.";
388
  } else {
389
  result = `Video generation failed: ${res.status}`;
390
  }
 
404
  onNewAsset({ role: "audio", content: dataUrl });
405
  result = "Audio generated successfully and shown to the user.";
406
  } else if (res.status == 429) {
407
+ result = "Too many requests. Try again later.";
408
  } else {
409
  result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
410
  }