Spaces:

neel692
/

GPT-OSS

Sleeping

App Files Files Community

GPT-OSS / app.py

NeelTA

initial commit

4d7a96c 5 months ago

raw

history blame contribute delete

2.31 kB

	from fastapi import FastAPI, Request, HTTPException
	from fastapi.responses import StreamingResponse, FileResponse
	from fastapi.staticfiles import StaticFiles
	import httpx
	import json

	app = FastAPI()

	# Serve chat.html at root
	@app.get("/")
	async def chat_page():
	return FileResponse("templates/chat.html")

	# Your existing streaming endpoint (simplified)
	@app.post("/stream_chat")
	async def stream_chat(request: Request):
	data = await request.json()
	prompt = data.get("prompt")
	if not prompt:
	raise HTTPException(status_code=400, detail="Missing 'prompt'")

	# Use gpt-oss:2b as requested
	model = "gpt-oss:20b"

	async def event_generator():
	try:
	url = "http://localhost:11434/api/chat"
	payload = {
	"model": model,
	"messages": [
	{"role": "system", "content": "You are a thoughtful assistant."},
	{"role": "user", "content": prompt}
	],
	"stream": True,
	"options": {
	"num_predict": 256,
	"num_ctx": 4096
	}
	}

	async with httpx.AsyncClient() as client:
	async with client.stream("POST", url, json=payload, timeout=None) as resp:
	resp.raise_for_status()

	async for line in resp.aiter_lines():
	if not line or not line.strip():
	continue
	try:
	chunk = json.loads(line)
	content = chunk.get("message", {}).get("content", "")
	if content:
	yield content
	except json.JSONDecodeError:
	continue


	except httpx.HTTPStatusError as e:
	# Capture Ollama-generated error
	error_msg = f"Ollama API returned error: {e.response.status_code} - {e.response.text}"
	yield f"[error]\n{error_msg}"

	return StreamingResponse(
	event_generator(),
	media_type="text/plain"
	)

	# For Hugging Face Spaces compatibility
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)