Spaces:

incognitolm
/

chat

Running

App Files Files Community

incognitolm commited on Apr 3

Commit

fb79c98

1 Parent(s): cad44cf

Search

Browse files

Files changed (2) hide show

server/Searchworker.js +31 -0
server/chatStream.js +54 -35

server/Searchworker.js ADDED Viewed

	@@ -0,0 +1,31 @@

+// searchWorker.js — runs inside a worker_thread, fully isolated from the
+// main WS server.  Receives a query via workerData, performs the Gradio
+// search, posts the result back via parentPort, then exits.
+//
+// Because this runs in its own V8 context / event loop, the Gradio client's
+// internal SSE fetch stream cannot interfere with the main thread's ws server.
+import { workerData, parentPort } from 'worker_threads';
+import { Client } from '@gradio/client';
+async function run() {
+  const { query } = workerData;
+  let client = null;
+  try {
+    client = await Client.connect('incognitolm/Web-Search');
+    const result = await client.predict('/perform_search', { query });
+    const raw = Array.isArray(result.data) ? result.data[0] : result.data;
+    if (!raw) throw new Error('Empty response from search endpoint');
+    const text = typeof raw === 'string' ? raw : JSON.stringify(raw);
+    parentPort.postMessage({ ok: true, result: text });
+  } catch (err) {
+    parentPort.postMessage({ ok: false, error: String(err) });
+  } finally {
+    try { client?.close?.(); } catch (_) {}
+    // Force-exit so the worker doesn't hang on lingering async handles
+    // (the SSE response body reader, heartbeat timer, etc.)
+    setTimeout(() => process.exit(0), 0);
+  }
+}
+run();

server/chatStream.js CHANGED Viewed

@@ -1,33 +1,55 @@
 import OpenAI from "openai";
-import { Client } from "@gradio/client";
 import { LIGHTNING_BASE } from "./config.js";
-// ── Web Search via Gradio client (fresh connection per call) ──────────────
 //
-// The original code kept a singleton Gradio client whose persistent internal
-// WebSocket would emit events that bled into the Node WS server, causing the
-// browser to see spurious disconnects after every search.
 //
-// Fix: create a brand-new Client.connect() for each search call, then
-// immediately destroy it when done.  This matches the pattern in the working
-// Electron app (see tools.ts: ollamaSearch).  There is no performance penalty
-// worth worrying about because searches are infrequent and the Space cold-
-// start cost dwarfs the connect overhead.
-async function gradioSearch(query) {
-  let client = null;
-  try {
-    client = await Client.connect("incognitolm/Web-Search");
-    const result = await client.predict("/perform_search", { query });
-    // result.data is an array; search results are in the first element.
-    const raw = Array.isArray(result.data) ? result.data[0] : result.data;
-    if (!raw) throw new Error("Empty response from search endpoint");
-    return typeof raw === "string" ? raw : JSON.stringify(raw);
-  } finally {
-    // Tear down the Gradio client's internal WebSocket so it doesn't linger
-    // and interfere with the server's own WS connections.
-    try { client?.close?.(); } catch (_) {}
-  }
 }
 const SYSTEM_PROMPT =
@@ -172,14 +194,11 @@ export async function streamChat(ws, {
 const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);
 function normalizeMessage(msg) {
-  // Drop asset entries (role: "image"/"video"/"audio") — these are UI-only
-  // and sending them to the LLM causes invalid-role rejections / blank responses
   if (!VALID_ROLES.has(msg.role)) return null;
   if (msg.role === "assistant" && msg.tool_calls) {
     return { role: "assistant", content: "", tool_calls: msg.tool_calls };
   }
-  // Flatten multipart content arrays (e.g. image attachments) to text-only for history
   if (Array.isArray(msg.content)) {
     const textOnly = msg.content
       .filter(b => b.type === "text")
@@ -277,8 +296,8 @@ function buildToolList(tools) {
 async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abortSignal, onToolCall, onNewAsset) {
   const toolResults = [];
   const authHeaders = {};
-  if (accessToken) {
-    authHeaders["Authorization"] = `Bearer ${accessToken}`;
   } else {
     console.log("No access token");
   }
@@ -335,9 +354,9 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
           onNewAsset({ role: "image", content: dataUrl });
           result = "Image generated successfully and shown to the user.";
         } else if (res.status == 402) {
-          result = "An upgraded plan is required for higher limits."
         } else if (res.status == 429) {
-          result = "Too many requests. Try again later."
         } else {
           result = `Image generation failed: ${res.status}`;
         }
@@ -363,9 +382,9 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
           onNewAsset({ role: "video", content: dataUrl });
           result = "Video generated successfully and shown to the user.";
         } else if (res.status == 402) {
-          result = "An upgraded plan is required for higher limits."
         } else if (res.status == 429) {
-          result = "Too many requests. Try again later."
         } else {
           result = `Video generation failed: ${res.status}`;
         }
@@ -385,7 +404,7 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
           onNewAsset({ role: "audio", content: dataUrl });
           result = "Audio generated successfully and shown to the user.";
         } else if (res.status == 429) {
-          result = "Too many requests. Try again later."
         } else {
           result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
         }

 import OpenAI from "openai";
+import { Worker } from "worker_threads";
+import { fileURLToPath } from "url";
+import path from "path";
 import { LIGHTNING_BASE } from "./config.js";
+// ── Web Search via an isolated Worker thread ──────────────────────────────
 //
+// The @gradio/client library opens a persistent SSE (Server-Sent Events)
+// fetch stream for its session queue.  Even after client.close() is called,
+// the SSE response-body reader keeps an async iterator alive in the current
+// event loop.  When that iterator eventually settles (stream closed/errored
+// by the remote server), it triggers internal callbacks that emit events
+// on objects the Node `ws` library also watches — causing the browser to
+// see a "connection lost" message immediately after every web search.
 //
+// The only reliable fix is to run the Gradio client in a worker_thread so
+// it gets its own V8 context and event loop.  When the worker exits (via
+// process.exit(0) in searchWorker.js), every handle it opened — SSE stream,
+// heartbeat timer, etc. — is torn down with it, leaving the main thread's
+// WS server completely untouched.
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const WORKER_PATH = path.join(__dirname, "searchWorker.js");
+function gradioSearch(query) {
+  return new Promise((resolve, reject) => {
+    const worker = new Worker(WORKER_PATH, { workerData: { query } });
+    const timeout = setTimeout(() => {
+      worker.terminate();
+      reject(new Error("Search timed out after 45s"));
+    }, 45_000);
+    worker.on("message", (msg) => {
+      clearTimeout(timeout);
+      if (msg.ok) resolve(msg.result);
+      else reject(new Error(msg.error));
+    });
+    worker.on("error", (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+    worker.on("exit", (code) => {
+      // By the time this fires the promise is already settled via "message".
+      // Only reject if the worker crashed without posting anything.
+      clearTimeout(timeout);
+      if (code !== 0) reject(new Error(`Search worker exited with code ${code}`));
+    });
+  });
 }
 const SYSTEM_PROMPT =
 const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);
 function normalizeMessage(msg) {
   if (!VALID_ROLES.has(msg.role)) return null;
   if (msg.role === "assistant" && msg.tool_calls) {
     return { role: "assistant", content: "", tool_calls: msg.tool_calls };
   }
   if (Array.isArray(msg.content)) {
     const textOnly = msg.content
       .filter(b => b.type === "text")
 async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abortSignal, onToolCall, onNewAsset) {
   const toolResults = [];
   const authHeaders = {};
+  if (accessToken) {
+    authHeaders["Authorization"] = `Bearer ${accessToken}`;
   } else {
     console.log("No access token");
   }
           onNewAsset({ role: "image", content: dataUrl });
           result = "Image generated successfully and shown to the user.";
         } else if (res.status == 402) {
+          result = "An upgraded plan is required for higher limits.";
         } else if (res.status == 429) {
+          result = "Too many requests. Try again later.";
         } else {
           result = `Image generation failed: ${res.status}`;
         }
           onNewAsset({ role: "video", content: dataUrl });
           result = "Video generated successfully and shown to the user.";
         } else if (res.status == 402) {
+          result = "An upgraded plan is required for higher limits.";
         } else if (res.status == 429) {
+          result = "Too many requests. Try again later.";
         } else {
           result = `Video generation failed: ${res.status}`;
         }
           onNewAsset({ role: "audio", content: dataUrl });
           result = "Audio generated successfully and shown to the user.";
         } else if (res.status == 429) {
+          result = "Too many requests. Try again later.";
         } else {
           result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
         }