|
|
import { request } from "undici"; |
|
|
|
|
|
const API_BASE = process.env.LLM_API_BASE ?? "https://api.openai.com"; |
|
|
const API_KEY = process.env.LLM_API_KEY ?? ""; |
|
|
const MODEL = process.env.LLM_MODEL ?? "gpt-4o-mini"; |
|
|
|
|
|
export async function streamChatCompletion( |
|
|
messages: Array<{ role: string; content: string }>, |
|
|
onToken: (token: string) => void |
|
|
) { |
|
|
if (!API_KEY) { |
|
|
throw new Error("Missing LLM_API_KEY environment variable."); |
|
|
} |
|
|
|
|
|
const payload = { |
|
|
model: MODEL, |
|
|
messages, |
|
|
stream: true, |
|
|
temperature: 0.3 |
|
|
}; |
|
|
|
|
|
const { body, statusCode } = await request(`${API_BASE}/v1/chat/completions`, { |
|
|
method: "POST", |
|
|
headers: { |
|
|
"Authorization": `Bearer ${API_KEY}`, |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
body: JSON.stringify(payload) |
|
|
}); |
|
|
|
|
|
if (statusCode < 200 || statusCode >= 300) { |
|
|
const errorText = await body.text(); |
|
|
throw new Error(`LLM request failed (${statusCode}): ${errorText.slice(0, 200)}`); |
|
|
} |
|
|
|
|
|
const reader = body.getReader(); |
|
|
const decoder = new TextDecoder(); |
|
|
let buffer = ""; |
|
|
|
|
|
while (true) { |
|
|
const { value, done } = await reader.read(); |
|
|
if (done) break; |
|
|
buffer += decoder.decode(value, { stream: true }); |
|
|
|
|
|
let boundary = buffer.indexOf("\n\n"); |
|
|
while (boundary !== -1) { |
|
|
const chunk = buffer.slice(0, boundary).trim(); |
|
|
buffer = buffer.slice(boundary + 2); |
|
|
if (chunk.startsWith("data:")) { |
|
|
const data = chunk.replace(/^data:\s*/, "").trim(); |
|
|
if (data === "[DONE]") return; |
|
|
try { |
|
|
const parsed = JSON.parse(data); |
|
|
const delta = parsed.choices?.[0]?.delta?.content; |
|
|
if (delta) onToken(delta); |
|
|
} catch { |
|
|
|
|
|
} |
|
|
} |
|
|
boundary = buffer.indexOf("\n\n"); |
|
|
} |
|
|
} |
|
|
}
|
|
|
|