""" Icarus Router — FastAPI server ============================== OpenAI-compatible LLM routing gateway. Self-hosted. $0/month. Endpoints: GET / — info GET /health — probe all providers in parallel GET /providers — list providers, routing table, model map POST /command — general routing (specify task type) POST /code — shortcut: CODE_SMALL chain POST /reason — shortcut: REASONING chain POST /chat — shortcut: CHAT chain POST /v1/chat/completions — OpenAI-compatible endpoint Run: uvicorn server:app --host 0.0.0.0 --port 8000 License: Apache 2.0 Author: Llewellyn Systems — https://www.llewellynsystems.com """ from __future__ import annotations from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional import time from router import route, health_check, TaskType, MODEL_MAP, ROUTING_TABLE, PROVIDERS app = FastAPI( title="Icarus Router", version="1.0.0", description="Multi-provider LLM routing gateway — $0/month free-tier focused", ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # ============================================================================ # REQUEST / RESPONSE MODELS # ============================================================================ class CommandRequest(BaseModel): prompt: str task: Optional[str] = "reasoning" max_tokens: Optional[int] = 1000 preferred: Optional[list[str]] = None # OpenAI-compatible models class OAIMessage(BaseModel): role: str content: str class OAIChatRequest(BaseModel): model: Optional[str] = "auto" messages: list[OAIMessage] max_tokens: Optional[int] = 1000 stream: Optional[bool] = False # ============================================================================ # HELPERS # ============================================================================ def _task(name: str) -> TaskType: try: return TaskType(name) except ValueError: raise HTTPException(status_code=400, detail=f"Unknown task type: '{name}'. Valid: {[t.value for t in TaskType]}") # ============================================================================ # ROUTES # ============================================================================ @app.get("/") async def root(): return { "name": "Icarus Router", "version": "1.0.0", "description": "Multi-provider LLM routing gateway — $0/month", "providers": list(PROVIDERS.keys()), "docs": "/docs", "health": "/health", "built_by": "Llewellyn Systems — https://www.llewellynsystems.com", } @app.get("/health") async def health(): """Probe all configured providers in parallel. Shows which are live.""" return await health_check() @app.get("/providers") async def providers(): """Return the full routing table and model map.""" return { "providers": list(PROVIDERS.keys()), "routing_table": {t.value: ps for t, ps in ROUTING_TABLE.items()}, "model_map": { p: {t.value: m for t, m in tm.items()} for p, tm in MODEL_MAP.items() }, } @app.post("/command") async def command(req: CommandRequest): """ General routing endpoint. Specify task to control provider chain. Returns response + provider metadata. """ return await route( req.prompt, task=_task(req.task or "reasoning"), max_tokens=req.max_tokens or 1000, preferred=req.preferred, ) @app.post("/code") async def code(req: CommandRequest): """Shortcut for CODE_SMALL task routing.""" return await route( req.prompt, task=TaskType.CODE_SMALL, max_tokens=req.max_tokens or 2000, preferred=req.preferred, ) @app.post("/reason") async def reason(req: CommandRequest): """Shortcut for REASONING task routing.""" return await route( req.prompt, task=TaskType.REASONING, max_tokens=req.max_tokens or 1500, preferred=req.preferred, ) @app.post("/chat") async def chat(req: CommandRequest): """Shortcut for CHAT task routing.""" return await route( req.prompt, task=TaskType.CHAT, max_tokens=req.max_tokens or 800, preferred=req.preferred, ) @app.post("/v1/chat/completions") async def openai_compat(req: OAIChatRequest): """ OpenAI-compatible endpoint. Drop-in replacement for openai.ChatCompletion. Usage: from openai import OpenAI client = OpenAI(base_url="http://localhost:8000/v1", api_key="not-needed") response = client.chat.completions.create( model="auto", messages=[{"role": "user", "content": "Hello"}] ) """ # Flatten messages to a single prompt (simple approach — extend for multi-turn) prompt_parts = [] for msg in req.messages: if msg.role == "system": prompt_parts.append(f"[System]: {msg.content}") elif msg.role == "user": prompt_parts.append(msg.content) elif msg.role == "assistant": prompt_parts.append(f"[Assistant]: {msg.content}") prompt = "\n".join(prompt_parts) # Map model hint to task type model_hint = (req.model or "auto").lower() if "code" in model_hint: task = TaskType.CODE_SMALL elif "reason" in model_hint: task = TaskType.REASONING elif "vision" in model_hint: task = TaskType.VISION else: task = TaskType.CHAT result = await route(prompt, task=task, max_tokens=req.max_tokens or 1000) if not result.get("response"): raise HTTPException(status_code=503, detail="All providers failed. Check /health for status.") # Return in OpenAI response shape created = int(time.time()) return { "id": f"icarus-{created}", "object": "chat.completion", "created": created, "model": result.get("model", "unknown"), "choices": [{ "index": 0, "message": { "role": "assistant", "content": result["response"], }, "finish_reason": "stop", }], "usage": { "prompt_tokens": len(prompt.split()), "completion_tokens": len((result.get("response") or "").split()), "total_tokens": len(prompt.split()) + len((result.get("response") or "").split()), }, "x_icarus": { "provider": result.get("provider"), "latency_ms": result.get("latency_ms"), "task": result.get("task"), "attempts": result.get("attempts", []), }, } if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)