File size: 1,808 Bytes
40a9423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import { request } from "undici";

const API_BASE = process.env.LLM_API_BASE ?? "https://api.openai.com";
const API_KEY = process.env.LLM_API_KEY ?? "";
const MODEL = process.env.LLM_MODEL ?? "gpt-4o-mini";

export async function streamChatCompletion(
  messages: Array<{ role: string; content: string }>,
  onToken: (token: string) => void
) {
  if (!API_KEY) {
    throw new Error("Missing LLM_API_KEY environment variable.");
  }

  const payload = {
    model: MODEL,
    messages,
    stream: true,
    temperature: 0.3
  };

  const { body, statusCode } = await request(`${API_BASE}/v1/chat/completions`, {
    method: "POST",
    headers: {
      "Authorization": `Bearer ${API_KEY}`,
      "Content-Type": "application/json"
    },
    body: JSON.stringify(payload)
  });

  if (statusCode < 200 || statusCode >= 300) {
    const errorText = await body.text();
    throw new Error(`LLM request failed (${statusCode}): ${errorText.slice(0, 200)}`);
  }

  const reader = body.getReader();
  const decoder = new TextDecoder();
  let buffer = "";

  while (true) {
    const { value, done } = await reader.read();
    if (done) break;
    buffer += decoder.decode(value, { stream: true });

    let boundary = buffer.indexOf("\n\n");
    while (boundary !== -1) {
      const chunk = buffer.slice(0, boundary).trim();
      buffer = buffer.slice(boundary + 2);
      if (chunk.startsWith("data:")) {
        const data = chunk.replace(/^data:\s*/, "").trim();
        if (data === "[DONE]") return;
        try {
          const parsed = JSON.parse(data);
          const delta = parsed.choices?.[0]?.delta?.content;
          if (delta) onToken(delta);
        } catch {
          // Ignore JSON parse errors in partial chunks
        }
      }
      boundary = buffer.indexOf("\n\n");
    }
  }
}