from fastapi import FastAPI, Request, HTTPException from fastapi.responses import StreamingResponse, FileResponse from fastapi.staticfiles import StaticFiles import httpx import json app = FastAPI() # Serve chat.html at root @app.get("/") async def chat_page(): return FileResponse("templates/chat.html") # Your existing streaming endpoint (simplified) @app.post("/stream_chat") async def stream_chat(request: Request): data = await request.json() prompt = data.get("prompt") if not prompt: raise HTTPException(status_code=400, detail="Missing 'prompt'") # Use gpt-oss:2b as requested model = "gpt-oss:20b" async def event_generator(): try: url = "http://localhost:11434/api/chat" payload = { "model": model, "messages": [ {"role": "system", "content": "You are a thoughtful assistant."}, {"role": "user", "content": prompt} ], "stream": True, "options": { "num_predict": 256, "num_ctx": 4096 } } async with httpx.AsyncClient() as client: async with client.stream("POST", url, json=payload, timeout=None) as resp: resp.raise_for_status() async for line in resp.aiter_lines(): if not line or not line.strip(): continue try: chunk = json.loads(line) content = chunk.get("message", {}).get("content", "") if content: yield content except json.JSONDecodeError: continue except httpx.HTTPStatusError as e: # Capture Ollama-generated error error_msg = f"Ollama API returned error: {e.response.status_code} - {e.response.text}" yield f"[error]\n{error_msg}" return StreamingResponse( event_generator(), media_type="text/plain" ) # For Hugging Face Spaces compatibility if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)