|
|
from fastapi import FastAPI, Request, HTTPException |
|
|
from fastapi.responses import StreamingResponse, FileResponse |
|
|
from fastapi.staticfiles import StaticFiles |
|
|
import httpx |
|
|
import json |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
async def chat_page(): |
|
|
return FileResponse("templates/chat.html") |
|
|
|
|
|
|
|
|
@app.post("/stream_chat") |
|
|
async def stream_chat(request: Request): |
|
|
data = await request.json() |
|
|
prompt = data.get("prompt") |
|
|
if not prompt: |
|
|
raise HTTPException(status_code=400, detail="Missing 'prompt'") |
|
|
|
|
|
|
|
|
model = "gpt-oss:20b" |
|
|
|
|
|
async def event_generator(): |
|
|
try: |
|
|
url = "http://localhost:11434/api/chat" |
|
|
payload = { |
|
|
"model": model, |
|
|
"messages": [ |
|
|
{"role": "system", "content": "You are a thoughtful assistant."}, |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
"stream": True, |
|
|
"options": { |
|
|
"num_predict": 256, |
|
|
"num_ctx": 4096 |
|
|
} |
|
|
} |
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
|
async with client.stream("POST", url, json=payload, timeout=None) as resp: |
|
|
resp.raise_for_status() |
|
|
|
|
|
async for line in resp.aiter_lines(): |
|
|
if not line or not line.strip(): |
|
|
continue |
|
|
try: |
|
|
chunk = json.loads(line) |
|
|
content = chunk.get("message", {}).get("content", "") |
|
|
if content: |
|
|
yield content |
|
|
except json.JSONDecodeError: |
|
|
continue |
|
|
|
|
|
|
|
|
except httpx.HTTPStatusError as e: |
|
|
|
|
|
error_msg = f"Ollama API returned error: {e.response.status_code} - {e.response.text}" |
|
|
yield f"[error]\n{error_msg}" |
|
|
|
|
|
return StreamingResponse( |
|
|
event_generator(), |
|
|
media_type="text/plain" |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |