Spaces:
Sleeping
Sleeping
| """ | |
| Icarus Router — FastAPI server | |
| ============================== | |
| OpenAI-compatible LLM routing gateway. Self-hosted. $0/month. | |
| Endpoints: | |
| GET / — info | |
| GET /health — probe all providers in parallel | |
| GET /providers — list providers, routing table, model map | |
| POST /command — general routing (specify task type) | |
| POST /code — shortcut: CODE_SMALL chain | |
| POST /reason — shortcut: REASONING chain | |
| POST /chat — shortcut: CHAT chain | |
| POST /v1/chat/completions — OpenAI-compatible endpoint | |
| Run: | |
| uvicorn server:app --host 0.0.0.0 --port 8000 | |
| License: Apache 2.0 | |
| Author: Llewellyn Systems — https://www.llewellynsystems.com | |
| """ | |
| from __future__ import annotations | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| import time | |
| from router import route, health_check, TaskType, MODEL_MAP, ROUTING_TABLE, PROVIDERS | |
| app = FastAPI( | |
| title="Icarus Router", | |
| version="1.0.0", | |
| description="Multi-provider LLM routing gateway — $0/month free-tier focused", | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ============================================================================ | |
| # REQUEST / RESPONSE MODELS | |
| # ============================================================================ | |
| class CommandRequest(BaseModel): | |
| prompt: str | |
| task: Optional[str] = "reasoning" | |
| max_tokens: Optional[int] = 1000 | |
| preferred: Optional[list[str]] = None | |
| # OpenAI-compatible models | |
| class OAIMessage(BaseModel): | |
| role: str | |
| content: str | |
| class OAIChatRequest(BaseModel): | |
| model: Optional[str] = "auto" | |
| messages: list[OAIMessage] | |
| max_tokens: Optional[int] = 1000 | |
| stream: Optional[bool] = False | |
| # ============================================================================ | |
| # HELPERS | |
| # ============================================================================ | |
| def _task(name: str) -> TaskType: | |
| try: | |
| return TaskType(name) | |
| except ValueError: | |
| raise HTTPException(status_code=400, detail=f"Unknown task type: '{name}'. Valid: {[t.value for t in TaskType]}") | |
| # ============================================================================ | |
| # ROUTES | |
| # ============================================================================ | |
| async def root(): | |
| return { | |
| "name": "Icarus Router", | |
| "version": "1.0.0", | |
| "description": "Multi-provider LLM routing gateway — $0/month", | |
| "providers": list(PROVIDERS.keys()), | |
| "docs": "/docs", | |
| "health": "/health", | |
| "built_by": "Llewellyn Systems — https://www.llewellynsystems.com", | |
| } | |
| async def health(): | |
| """Probe all configured providers in parallel. Shows which are live.""" | |
| return await health_check() | |
| async def providers(): | |
| """Return the full routing table and model map.""" | |
| return { | |
| "providers": list(PROVIDERS.keys()), | |
| "routing_table": {t.value: ps for t, ps in ROUTING_TABLE.items()}, | |
| "model_map": { | |
| p: {t.value: m for t, m in tm.items()} | |
| for p, tm in MODEL_MAP.items() | |
| }, | |
| } | |
| async def command(req: CommandRequest): | |
| """ | |
| General routing endpoint. Specify task to control provider chain. | |
| Returns response + provider metadata. | |
| """ | |
| return await route( | |
| req.prompt, | |
| task=_task(req.task or "reasoning"), | |
| max_tokens=req.max_tokens or 1000, | |
| preferred=req.preferred, | |
| ) | |
| async def code(req: CommandRequest): | |
| """Shortcut for CODE_SMALL task routing.""" | |
| return await route( | |
| req.prompt, | |
| task=TaskType.CODE_SMALL, | |
| max_tokens=req.max_tokens or 2000, | |
| preferred=req.preferred, | |
| ) | |
| async def reason(req: CommandRequest): | |
| """Shortcut for REASONING task routing.""" | |
| return await route( | |
| req.prompt, | |
| task=TaskType.REASONING, | |
| max_tokens=req.max_tokens or 1500, | |
| preferred=req.preferred, | |
| ) | |
| async def chat(req: CommandRequest): | |
| """Shortcut for CHAT task routing.""" | |
| return await route( | |
| req.prompt, | |
| task=TaskType.CHAT, | |
| max_tokens=req.max_tokens or 800, | |
| preferred=req.preferred, | |
| ) | |
| async def openai_compat(req: OAIChatRequest): | |
| """ | |
| OpenAI-compatible endpoint. Drop-in replacement for openai.ChatCompletion. | |
| Usage: | |
| from openai import OpenAI | |
| client = OpenAI(base_url="http://localhost:8000/v1", api_key="not-needed") | |
| response = client.chat.completions.create( | |
| model="auto", | |
| messages=[{"role": "user", "content": "Hello"}] | |
| ) | |
| """ | |
| # Flatten messages to a single prompt (simple approach — extend for multi-turn) | |
| prompt_parts = [] | |
| for msg in req.messages: | |
| if msg.role == "system": | |
| prompt_parts.append(f"[System]: {msg.content}") | |
| elif msg.role == "user": | |
| prompt_parts.append(msg.content) | |
| elif msg.role == "assistant": | |
| prompt_parts.append(f"[Assistant]: {msg.content}") | |
| prompt = "\n".join(prompt_parts) | |
| # Map model hint to task type | |
| model_hint = (req.model or "auto").lower() | |
| if "code" in model_hint: | |
| task = TaskType.CODE_SMALL | |
| elif "reason" in model_hint: | |
| task = TaskType.REASONING | |
| elif "vision" in model_hint: | |
| task = TaskType.VISION | |
| else: | |
| task = TaskType.CHAT | |
| result = await route(prompt, task=task, max_tokens=req.max_tokens or 1000) | |
| if not result.get("response"): | |
| raise HTTPException(status_code=503, detail="All providers failed. Check /health for status.") | |
| # Return in OpenAI response shape | |
| created = int(time.time()) | |
| return { | |
| "id": f"icarus-{created}", | |
| "object": "chat.completion", | |
| "created": created, | |
| "model": result.get("model", "unknown"), | |
| "choices": [{ | |
| "index": 0, | |
| "message": { | |
| "role": "assistant", | |
| "content": result["response"], | |
| }, | |
| "finish_reason": "stop", | |
| }], | |
| "usage": { | |
| "prompt_tokens": len(prompt.split()), | |
| "completion_tokens": len((result.get("response") or "").split()), | |
| "total_tokens": len(prompt.split()) + len((result.get("response") or "").split()), | |
| }, | |
| "x_icarus": { | |
| "provider": result.get("provider"), | |
| "latency_ms": result.get("latency_ms"), | |
| "task": result.get("task"), | |
| "attempts": result.get("attempts", []), | |
| }, | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |