import express, { Request, Response } from 'express'; import path from 'path'; import { fileURLToPath } from 'url'; const app = express(); const PORT = 7860; const MODEL_ID = "amiguel/qwen2.5-7b-instruct-ai_llm-sft"; // OpenAI-compatible chat completions endpoint — handles PEFT adapters, // applies the model's own chat template, and streams in standard SSE format. const API_URL = `https://api-inference.huggingface.co/models/${MODEL_ID}/v1/chat/completions`; app.use(express.json()); const __dirname = path.dirname(fileURLToPath(import.meta.url)); app.use(express.static(path.join(__dirname, 'dist'))); app.post('/api/chat', async (req: Request, res: Response) => { const { messages } = req.body; const hfToken = process.env.HF_TOKEN; if (!hfToken) { return res.status(401).json({ error: "HF_TOKEN environment variable not set." }); } try { const response = await fetch(API_URL, { method: 'POST', headers: { 'Authorization': `Bearer ${hfToken}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: MODEL_ID, messages, max_tokens: 1024, stream: true, temperature: 0.7, }), }); if (!response.ok) { const errorText = await response.text(); console.error("HF API Error:", errorText); return res.status(response.status).json({ error: errorText }); } res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Cache-Control', 'no-cache'); res.setHeader('Connection', 'keep-alive'); const reader = response.body?.getReader(); const decoder = new TextDecoder(); if (!reader) { return res.end(); } while (true) { const { done, value } = await reader.read(); if (done) break; const chunk = decoder.decode(value, { stream: true }); res.write(chunk); } res.end(); } catch (error) { console.error("Server Error:", error); res.status(500).json({ error: "Internal Server Error" }); } }); // SPA fallback app.get('*', (_req, res) => { res.sendFile(path.join(__dirname, 'dist', 'index.html')); }); app.listen(PORT, () => { console.log(`Server running on http://0.0.0.0:${PORT}`); });