GPT-OSS / app.py
NeelTA's picture
initial commit
4d7a96c
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
import httpx
import json
app = FastAPI()
# Serve chat.html at root
@app.get("/")
async def chat_page():
return FileResponse("templates/chat.html")
# Your existing streaming endpoint (simplified)
@app.post("/stream_chat")
async def stream_chat(request: Request):
data = await request.json()
prompt = data.get("prompt")
if not prompt:
raise HTTPException(status_code=400, detail="Missing 'prompt'")
# Use gpt-oss:2b as requested
model = "gpt-oss:20b"
async def event_generator():
try:
url = "http://localhost:11434/api/chat"
payload = {
"model": model,
"messages": [
{"role": "system", "content": "You are a thoughtful assistant."},
{"role": "user", "content": prompt}
],
"stream": True,
"options": {
"num_predict": 256,
"num_ctx": 4096
}
}
async with httpx.AsyncClient() as client:
async with client.stream("POST", url, json=payload, timeout=None) as resp:
resp.raise_for_status()
async for line in resp.aiter_lines():
if not line or not line.strip():
continue
try:
chunk = json.loads(line)
content = chunk.get("message", {}).get("content", "")
if content:
yield content
except json.JSONDecodeError:
continue
except httpx.HTTPStatusError as e:
# Capture Ollama-generated error
error_msg = f"Ollama API returned error: {e.response.status_code} - {e.response.text}"
yield f"[error]\n{error_msg}"
return StreamingResponse(
event_generator(),
media_type="text/plain"
)
# For Hugging Face Spaces compatibility
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)