Spaces:
Running
Running
incognitolm commited on
Commit Β·
fb79c98
1
Parent(s): cad44cf
Search
Browse files- server/Searchworker.js +31 -0
- server/chatStream.js +54 -35
server/Searchworker.js
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// searchWorker.js β runs inside a worker_thread, fully isolated from the
|
| 2 |
+
// main WS server. Receives a query via workerData, performs the Gradio
|
| 3 |
+
// search, posts the result back via parentPort, then exits.
|
| 4 |
+
//
|
| 5 |
+
// Because this runs in its own V8 context / event loop, the Gradio client's
|
| 6 |
+
// internal SSE fetch stream cannot interfere with the main thread's ws server.
|
| 7 |
+
|
| 8 |
+
import { workerData, parentPort } from 'worker_threads';
|
| 9 |
+
import { Client } from '@gradio/client';
|
| 10 |
+
|
| 11 |
+
async function run() {
|
| 12 |
+
const { query } = workerData;
|
| 13 |
+
let client = null;
|
| 14 |
+
try {
|
| 15 |
+
client = await Client.connect('incognitolm/Web-Search');
|
| 16 |
+
const result = await client.predict('/perform_search', { query });
|
| 17 |
+
const raw = Array.isArray(result.data) ? result.data[0] : result.data;
|
| 18 |
+
if (!raw) throw new Error('Empty response from search endpoint');
|
| 19 |
+
const text = typeof raw === 'string' ? raw : JSON.stringify(raw);
|
| 20 |
+
parentPort.postMessage({ ok: true, result: text });
|
| 21 |
+
} catch (err) {
|
| 22 |
+
parentPort.postMessage({ ok: false, error: String(err) });
|
| 23 |
+
} finally {
|
| 24 |
+
try { client?.close?.(); } catch (_) {}
|
| 25 |
+
// Force-exit so the worker doesn't hang on lingering async handles
|
| 26 |
+
// (the SSE response body reader, heartbeat timer, etc.)
|
| 27 |
+
setTimeout(() => process.exit(0), 0);
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
run();
|
server/chatStream.js
CHANGED
|
@@ -1,33 +1,55 @@
|
|
| 1 |
import OpenAI from "openai";
|
| 2 |
-
import {
|
|
|
|
|
|
|
| 3 |
import { LIGHTNING_BASE } from "./config.js";
|
| 4 |
|
| 5 |
-
// ββ Web Search via
|
| 6 |
//
|
| 7 |
-
// The
|
| 8 |
-
//
|
| 9 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
//
|
| 11 |
-
//
|
| 12 |
-
//
|
| 13 |
-
//
|
| 14 |
-
//
|
| 15 |
-
//
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
}
|
| 32 |
|
| 33 |
const SYSTEM_PROMPT =
|
|
@@ -172,14 +194,11 @@ export async function streamChat(ws, {
|
|
| 172 |
const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);
|
| 173 |
|
| 174 |
function normalizeMessage(msg) {
|
| 175 |
-
// Drop asset entries (role: "image"/"video"/"audio") β these are UI-only
|
| 176 |
-
// and sending them to the LLM causes invalid-role rejections / blank responses
|
| 177 |
if (!VALID_ROLES.has(msg.role)) return null;
|
| 178 |
|
| 179 |
if (msg.role === "assistant" && msg.tool_calls) {
|
| 180 |
return { role: "assistant", content: "", tool_calls: msg.tool_calls };
|
| 181 |
}
|
| 182 |
-
// Flatten multipart content arrays (e.g. image attachments) to text-only for history
|
| 183 |
if (Array.isArray(msg.content)) {
|
| 184 |
const textOnly = msg.content
|
| 185 |
.filter(b => b.type === "text")
|
|
@@ -277,8 +296,8 @@ function buildToolList(tools) {
|
|
| 277 |
async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abortSignal, onToolCall, onNewAsset) {
|
| 278 |
const toolResults = [];
|
| 279 |
const authHeaders = {};
|
| 280 |
-
if (accessToken) {
|
| 281 |
-
authHeaders["Authorization"] = `Bearer ${accessToken}`;
|
| 282 |
} else {
|
| 283 |
console.log("No access token");
|
| 284 |
}
|
|
@@ -335,9 +354,9 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
|
|
| 335 |
onNewAsset({ role: "image", content: dataUrl });
|
| 336 |
result = "Image generated successfully and shown to the user.";
|
| 337 |
} else if (res.status == 402) {
|
| 338 |
-
result = "An upgraded plan is required for higher limits."
|
| 339 |
} else if (res.status == 429) {
|
| 340 |
-
result = "Too many requests. Try again later."
|
| 341 |
} else {
|
| 342 |
result = `Image generation failed: ${res.status}`;
|
| 343 |
}
|
|
@@ -363,9 +382,9 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
|
|
| 363 |
onNewAsset({ role: "video", content: dataUrl });
|
| 364 |
result = "Video generated successfully and shown to the user.";
|
| 365 |
} else if (res.status == 402) {
|
| 366 |
-
result = "An upgraded plan is required for higher limits."
|
| 367 |
} else if (res.status == 429) {
|
| 368 |
-
result = "Too many requests. Try again later."
|
| 369 |
} else {
|
| 370 |
result = `Video generation failed: ${res.status}`;
|
| 371 |
}
|
|
@@ -385,7 +404,7 @@ async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abo
|
|
| 385 |
onNewAsset({ role: "audio", content: dataUrl });
|
| 386 |
result = "Audio generated successfully and shown to the user.";
|
| 387 |
} else if (res.status == 429) {
|
| 388 |
-
result = "Too many requests. Try again later."
|
| 389 |
} else {
|
| 390 |
result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
|
| 391 |
}
|
|
|
|
| 1 |
import OpenAI from "openai";
|
| 2 |
+
import { Worker } from "worker_threads";
|
| 3 |
+
import { fileURLToPath } from "url";
|
| 4 |
+
import path from "path";
|
| 5 |
import { LIGHTNING_BASE } from "./config.js";
|
| 6 |
|
| 7 |
+
// ββ Web Search via an isolated Worker thread ββββββββββββββββββββββββββββββ
|
| 8 |
//
|
| 9 |
+
// The @gradio/client library opens a persistent SSE (Server-Sent Events)
|
| 10 |
+
// fetch stream for its session queue. Even after client.close() is called,
|
| 11 |
+
// the SSE response-body reader keeps an async iterator alive in the current
|
| 12 |
+
// event loop. When that iterator eventually settles (stream closed/errored
|
| 13 |
+
// by the remote server), it triggers internal callbacks that emit events
|
| 14 |
+
// on objects the Node `ws` library also watches β causing the browser to
|
| 15 |
+
// see a "connection lost" message immediately after every web search.
|
| 16 |
//
|
| 17 |
+
// The only reliable fix is to run the Gradio client in a worker_thread so
|
| 18 |
+
// it gets its own V8 context and event loop. When the worker exits (via
|
| 19 |
+
// process.exit(0) in searchWorker.js), every handle it opened β SSE stream,
|
| 20 |
+
// heartbeat timer, etc. β is torn down with it, leaving the main thread's
|
| 21 |
+
// WS server completely untouched.
|
| 22 |
+
|
| 23 |
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
| 24 |
+
const WORKER_PATH = path.join(__dirname, "searchWorker.js");
|
| 25 |
+
|
| 26 |
+
function gradioSearch(query) {
|
| 27 |
+
return new Promise((resolve, reject) => {
|
| 28 |
+
const worker = new Worker(WORKER_PATH, { workerData: { query } });
|
| 29 |
+
|
| 30 |
+
const timeout = setTimeout(() => {
|
| 31 |
+
worker.terminate();
|
| 32 |
+
reject(new Error("Search timed out after 45s"));
|
| 33 |
+
}, 45_000);
|
| 34 |
+
|
| 35 |
+
worker.on("message", (msg) => {
|
| 36 |
+
clearTimeout(timeout);
|
| 37 |
+
if (msg.ok) resolve(msg.result);
|
| 38 |
+
else reject(new Error(msg.error));
|
| 39 |
+
});
|
| 40 |
+
|
| 41 |
+
worker.on("error", (err) => {
|
| 42 |
+
clearTimeout(timeout);
|
| 43 |
+
reject(err);
|
| 44 |
+
});
|
| 45 |
+
|
| 46 |
+
worker.on("exit", (code) => {
|
| 47 |
+
// By the time this fires the promise is already settled via "message".
|
| 48 |
+
// Only reject if the worker crashed without posting anything.
|
| 49 |
+
clearTimeout(timeout);
|
| 50 |
+
if (code !== 0) reject(new Error(`Search worker exited with code ${code}`));
|
| 51 |
+
});
|
| 52 |
+
});
|
| 53 |
}
|
| 54 |
|
| 55 |
const SYSTEM_PROMPT =
|
|
|
|
| 194 |
const VALID_ROLES = new Set(["system", "user", "assistant", "tool"]);
|
| 195 |
|
| 196 |
function normalizeMessage(msg) {
|
|
|
|
|
|
|
| 197 |
if (!VALID_ROLES.has(msg.role)) return null;
|
| 198 |
|
| 199 |
if (msg.role === "assistant" && msg.tool_calls) {
|
| 200 |
return { role: "assistant", content: "", tool_calls: msg.tool_calls };
|
| 201 |
}
|
|
|
|
| 202 |
if (Array.isArray(msg.content)) {
|
| 203 |
const textOnly = msg.content
|
| 204 |
.filter(b => b.type === "text")
|
|
|
|
| 296 |
async function processToolCalls(ws, toolCalls, tools, accessToken, clientId, abortSignal, onToolCall, onNewAsset) {
|
| 297 |
const toolResults = [];
|
| 298 |
const authHeaders = {};
|
| 299 |
+
if (accessToken) {
|
| 300 |
+
authHeaders["Authorization"] = `Bearer ${accessToken}`;
|
| 301 |
} else {
|
| 302 |
console.log("No access token");
|
| 303 |
}
|
|
|
|
| 354 |
onNewAsset({ role: "image", content: dataUrl });
|
| 355 |
result = "Image generated successfully and shown to the user.";
|
| 356 |
} else if (res.status == 402) {
|
| 357 |
+
result = "An upgraded plan is required for higher limits.";
|
| 358 |
} else if (res.status == 429) {
|
| 359 |
+
result = "Too many requests. Try again later.";
|
| 360 |
} else {
|
| 361 |
result = `Image generation failed: ${res.status}`;
|
| 362 |
}
|
|
|
|
| 382 |
onNewAsset({ role: "video", content: dataUrl });
|
| 383 |
result = "Video generated successfully and shown to the user.";
|
| 384 |
} else if (res.status == 402) {
|
| 385 |
+
result = "An upgraded plan is required for higher limits.";
|
| 386 |
} else if (res.status == 429) {
|
| 387 |
+
result = "Too many requests. Try again later.";
|
| 388 |
} else {
|
| 389 |
result = `Video generation failed: ${res.status}`;
|
| 390 |
}
|
|
|
|
| 404 |
onNewAsset({ role: "audio", content: dataUrl });
|
| 405 |
result = "Audio generated successfully and shown to the user.";
|
| 406 |
} else if (res.status == 429) {
|
| 407 |
+
result = "Too many requests. Try again later.";
|
| 408 |
} else {
|
| 409 |
result = `Audio generation failed: ${res.status}. This is most likely an upstream provider error.`;
|
| 410 |
}
|