ollama-proxy / app.py
eran-mekler's picture
Update app.py
22d60ae verified
import json
from fastapi import FastAPI, Request, Response
from fastapi.responses import JSONResponse, StreamingResponse
import requests
import httpx
app = FastAPI()
OLLAMA_BASE = "http://localhost:11434"
@app.get("/")
def home():
return {"status": "Ollama Proxy is running."}
@app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
async def proxy(full_path: str, request: Request):
async with httpx.AsyncClient(timeout=1200) as client:
target_url = f"{OLLAMA_BASE}/{full_path}"
method = request.method
headers = dict(request.headers)
headers.pop("host", None) # Clean host header
headers["Authorization"] = headers.get("Authorization", "Bearer ollama")
body = await request.body()
try:
response = await client.request(
method=method,
url=target_url,
headers=headers,
content=body
)
content_type = response.headers.get("content-type", "")
if content_type.startswith("text/event-stream"):
async def stream_generator():
async for chunk in response.aiter_bytes():
yield chunk
return StreamingResponse(
stream_generator(),
media_type="text/event-stream",
status_code=response.status_code
)
else:
try:
json_data = response.json()
return JSONResponse(content=json_data, status_code=response.status_code)
except Exception:
return JSONResponse(
content={"error": "Ollama returned non-JSON response", "raw": await response.aread()},
status_code=response.status_code
)
except httpx.RequestError as e:
return JSONResponse(
content={"error": "Request failed", "details": str(e)},
status_code=500
)
@app.get('/ask')
def ask(prompt: str):
res = requests.post('http://ollama:11434/api/generate', json={
"prompt": prompt,
"stream": False,
"model": "mistral"
})
return Response(content=res.text, media_type="application/json")