aio-llm / app.py
OrganatAdminIT's picture
fix: replace Ollama with LiteLLM forward proxy β€” startup <15s
a83870b verified
"""
organatadminit-aio-llm β€” LiteLLM Forward Proxy (FastAPI)
=========================================================
Replaces Ollama-based startup (30 min model download) with instant forward proxy.
Routes /v1/* requests to organatceo inference nodes with automatic failover.
Primary: https://organatceo-aio-llm.hf.space/v1
Fallback: https://organathf3-aio-llm.hf.space/v1
Startup time: <15 seconds (was 20–30 minutes with Ollama pull)
"""
import os
import httpx
import asyncio
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse
app = FastAPI(title="organatadminit-aio-llm", version="2.0.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Upstream nodes β€” TRIPRUT inference fleet (organatceo account)
UPSTREAM_NODES = [
os.environ.get("PRIMARY_URL", "https://organatceo-aio-llm.hf.space"),
os.environ.get("FALLBACK_URL", "https://organathf3-aio-llm.hf.space"),
]
TIMEOUT = httpx.Timeout(connect=10.0, read=120.0, write=30.0, pool=10.0)
@app.get("/health")
async def health():
return {"status": "ok", "node": "organatadminit-aio-llm", "mode": "forward-proxy", "version": "2.0.0"}
@app.get("/")
async def root():
return {"status": "ok", "node": "organatadminit-aio-llm", "upstream": UPSTREAM_NODES[0]}
async def _forward(request: Request, path: str) -> Response:
body = await request.body()
headers = {
k: v for k, v in request.headers.items()
if k.lower() not in ("host", "content-length")
}
headers["X-Forwarded-By"] = "organatadminit-aio-llm"
last_error = None
for node_url in UPSTREAM_NODES:
target = f"{node_url}/{path}"
try:
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
resp = await client.request(
method=request.method,
url=target,
headers=headers,
content=body,
)
# Stream response back
resp_headers = dict(resp.headers)
resp_headers["X-Triprut-Node"] = "organatadminit-aio-llm"
resp_headers.pop("content-encoding", None)
resp_headers.pop("transfer-encoding", None)
return Response(
content=resp.content,
status_code=resp.status_code,
headers=resp_headers,
)
except Exception as e:
last_error = str(e)
continue
return JSONResponse(
{"error": "All upstream nodes failed", "detail": last_error},
status_code=503,
)
@app.api_route("/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
async def proxy_v1(request: Request, path: str):
return await _forward(request, f"v1/{path}")
@app.api_route("/api/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
async def proxy_api(request: Request, path: str):
return await _forward(request, f"api/{path}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)