import asyncio import time import httpx import subprocess from datetime import datetime, timedelta from typing import Optional from fastapi import FastAPI, Request, HTTPException from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse from fastapi.middleware.cors import CORSMiddleware import uvicorn app = FastAPI(title="Ollama Gateway + Monitor") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) OLLAMA_BASE = "http://127.0.0.1:11434" # ─── Uptime tracking ────────────────────────────────────────────────────────── start_time = time.time() health_log: list[dict] = [] MAX_LOG = 100 async def check_ollama_health() -> bool: try: async with httpx.AsyncClient(timeout=5) as client: r = await client.get(f"{OLLAMA_BASE}/api/tags") return r.status_code == 200 except Exception: return False async def health_loop(): while True: ok = await check_ollama_health() health_log.append({"ts": time.time(), "ok": ok}) if len(health_log) > MAX_LOG: health_log.pop(0) await asyncio.sleep(30) @app.on_event("startup") async def startup(): asyncio.create_task(health_loop()) # ─── Monitoring endpoints ───────────────────────────────────────────────────── @app.get("/health", response_class=JSONResponse) async def health(): ollama_ok = await check_ollama_health() uptime_sec = int(time.time() - start_time) total = len(health_log) good = sum(1 for h in health_log if h["ok"]) return { "status": "ok", "uptime_seconds": uptime_sec, "uptime_human": str(timedelta(seconds=uptime_sec)), "ollama": "up" if ollama_ok else "down", "ollama_uptime_pct": round(good / total * 100, 1) if total else None, "checks_recorded": total, } @app.get("/monitor", response_class=HTMLResponse) async def monitor_dashboard(): ollama_ok = await check_ollama_health() uptime_sec = int(time.time() - start_time) total = len(health_log) good = sum(1 for h in health_log if h["ok"]) pct = round(good / total * 100, 1) if total else 0 dots = "" for h in health_log[-50:]: color = "#22c55e" if h["ok"] else "#ef4444" ts = datetime.fromtimestamp(h["ts"]).strftime("%H:%M:%S") dots += f'' ollama_color = "#22c55e" if ollama_ok else "#ef4444" ollama_label = "🟢 UP" if ollama_ok else "🔴 DOWN" html = f""" Server Monitor

🖥️ Server Monitor

Server Uptime
{str(timedelta(seconds=uptime_sec))}
Ollama Status
{ollama_label}
Ollama Uptime %
{pct}%
Checks Logged
{total}
Last {min(total,50)} checks (green=up, red=down) — auto-refreshes every 30s
{dots if dots else 'No data yet — checks run every 30s'}
""" return html # ─── Ollama proxy ───────────────────────────────────────────────────────────── async def _ensure_ollama(): ok = await check_ollama_health() if not ok: subprocess.Popen( ["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) for _ in range(20): await asyncio.sleep(0.5) if await check_ollama_health(): return raise HTTPException(502, "Ollama failed to start") @app.api_route( "/api/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"], ) async def ollama_proxy(path: str, request: Request): await _ensure_ollama() url = f"{OLLAMA_BASE}/api/{path}" body = await request.body() headers = { k: v for k, v in request.headers.items() if k.lower() not in ("host", "content-length") } async def stream_response(): async with httpx.AsyncClient(timeout=None) as client: async with client.stream( request.method, url, content=body, headers=headers, params=dict(request.query_params), ) as r: async for chunk in r.aiter_bytes(): yield chunk try: import json as _json body_json = _json.loads(body) if body else {} except Exception: body_json = {} if body_json.get("stream", True) and request.method == "POST": return StreamingResponse(stream_response(), media_type="application/x-ndjson") async with httpx.AsyncClient(timeout=120) as client: r = await client.request( request.method, url, content=body, headers=headers, params=dict(request.query_params), ) return JSONResponse(status_code=r.status_code, content=r.json()) # ─── Run ────────────────────────────────────────────────────────────────────── if name == "main": uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)