Spaces:

neel692
/

GPT-OSS

Sleeping

File size: 2,313 Bytes

4d7a96c

from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
import httpx
import json

app = FastAPI()

# Serve chat.html at root
@app.get("/")
async def chat_page():
    return FileResponse("templates/chat.html")

# Your existing streaming endpoint (simplified)
@app.post("/stream_chat")
async def stream_chat(request: Request):
    data = await request.json()
    prompt = data.get("prompt")
    if not prompt:
        raise HTTPException(status_code=400, detail="Missing 'prompt'")
    
    # Use gpt-oss:2b as requested
    model = "gpt-oss:20b"  
    
    async def event_generator():
        try:
            url = "http://localhost:11434/api/chat"
            payload = {
                "model": model,
                "messages": [
                    {"role": "system", "content": "You are a thoughtful assistant."},
                    {"role": "user", "content": prompt}
                ],
                "stream": True,
                "options": {
                    "num_predict": 256,
                    "num_ctx": 4096
                }
            }

            async with httpx.AsyncClient() as client:
                async with client.stream("POST", url, json=payload, timeout=None) as resp:
                    resp.raise_for_status()

                    async for line in resp.aiter_lines():
                        if not line or not line.strip():
                            continue
                        try:
                            chunk = json.loads(line)
                            content = chunk.get("message", {}).get("content", "")
                            if content:
                                yield content
                        except json.JSONDecodeError:
                            continue


        except httpx.HTTPStatusError as e:
            # Capture Ollama-generated error
            error_msg = f"Ollama API returned error: {e.response.status_code} - {e.response.text}"
            yield f"[error]\n{error_msg}"

    return StreamingResponse(
        event_generator(), 
        media_type="text/plain"
    )

# For Hugging Face Spaces compatibility
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)