expertInsp / server.ts
valonys
Fix inference: switch to OpenAI-compatible /v1/chat/completions endpoint
b02f0c1
import express, { Request, Response } from 'express';
import path from 'path';
import { fileURLToPath } from 'url';
const app = express();
const PORT = 7860;
const MODEL_ID = "amiguel/qwen2.5-7b-instruct-ai_llm-sft";
// OpenAI-compatible chat completions endpoint — handles PEFT adapters,
// applies the model's own chat template, and streams in standard SSE format.
const API_URL = `https://api-inference.huggingface.co/models/${MODEL_ID}/v1/chat/completions`;
app.use(express.json());
const __dirname = path.dirname(fileURLToPath(import.meta.url));
app.use(express.static(path.join(__dirname, 'dist')));
app.post('/api/chat', async (req: Request, res: Response) => {
const { messages } = req.body;
const hfToken = process.env.HF_TOKEN;
if (!hfToken) {
return res.status(401).json({ error: "HF_TOKEN environment variable not set." });
}
try {
const response = await fetch(API_URL, {
method: 'POST',
headers: {
'Authorization': `Bearer ${hfToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: MODEL_ID,
messages,
max_tokens: 1024,
stream: true,
temperature: 0.7,
}),
});
if (!response.ok) {
const errorText = await response.text();
console.error("HF API Error:", errorText);
return res.status(response.status).json({ error: errorText });
}
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const reader = response.body?.getReader();
const decoder = new TextDecoder();
if (!reader) {
return res.end();
}
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
res.write(chunk);
}
res.end();
} catch (error) {
console.error("Server Error:", error);
res.status(500).json({ error: "Internal Server Error" });
}
});
// SPA fallback
app.get('*', (_req, res) => {
res.sendFile(path.join(__dirname, 'dist', 'index.html'));
});
app.listen(PORT, () => {
console.log(`Server running on http://0.0.0.0:${PORT}`);
});