icarus-router / server.py
staff
Upload server.py with huggingface_hub
779e956 verified
"""
Icarus Router — FastAPI server
==============================
OpenAI-compatible LLM routing gateway. Self-hosted. $0/month.
Endpoints:
GET / — info
GET /health — probe all providers in parallel
GET /providers — list providers, routing table, model map
POST /command — general routing (specify task type)
POST /code — shortcut: CODE_SMALL chain
POST /reason — shortcut: REASONING chain
POST /chat — shortcut: CHAT chain
POST /v1/chat/completions — OpenAI-compatible endpoint
Run:
uvicorn server:app --host 0.0.0.0 --port 8000
License: Apache 2.0
Author: Llewellyn Systems — https://www.llewellynsystems.com
"""
from __future__ import annotations
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import time
from router import route, health_check, TaskType, MODEL_MAP, ROUTING_TABLE, PROVIDERS
app = FastAPI(
title="Icarus Router",
version="1.0.0",
description="Multi-provider LLM routing gateway — $0/month free-tier focused",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ============================================================================
# REQUEST / RESPONSE MODELS
# ============================================================================
class CommandRequest(BaseModel):
prompt: str
task: Optional[str] = "reasoning"
max_tokens: Optional[int] = 1000
preferred: Optional[list[str]] = None
# OpenAI-compatible models
class OAIMessage(BaseModel):
role: str
content: str
class OAIChatRequest(BaseModel):
model: Optional[str] = "auto"
messages: list[OAIMessage]
max_tokens: Optional[int] = 1000
stream: Optional[bool] = False
# ============================================================================
# HELPERS
# ============================================================================
def _task(name: str) -> TaskType:
try:
return TaskType(name)
except ValueError:
raise HTTPException(status_code=400, detail=f"Unknown task type: '{name}'. Valid: {[t.value for t in TaskType]}")
# ============================================================================
# ROUTES
# ============================================================================
@app.get("/")
async def root():
return {
"name": "Icarus Router",
"version": "1.0.0",
"description": "Multi-provider LLM routing gateway — $0/month",
"providers": list(PROVIDERS.keys()),
"docs": "/docs",
"health": "/health",
"built_by": "Llewellyn Systems — https://www.llewellynsystems.com",
}
@app.get("/health")
async def health():
"""Probe all configured providers in parallel. Shows which are live."""
return await health_check()
@app.get("/providers")
async def providers():
"""Return the full routing table and model map."""
return {
"providers": list(PROVIDERS.keys()),
"routing_table": {t.value: ps for t, ps in ROUTING_TABLE.items()},
"model_map": {
p: {t.value: m for t, m in tm.items()}
for p, tm in MODEL_MAP.items()
},
}
@app.post("/command")
async def command(req: CommandRequest):
"""
General routing endpoint. Specify task to control provider chain.
Returns response + provider metadata.
"""
return await route(
req.prompt,
task=_task(req.task or "reasoning"),
max_tokens=req.max_tokens or 1000,
preferred=req.preferred,
)
@app.post("/code")
async def code(req: CommandRequest):
"""Shortcut for CODE_SMALL task routing."""
return await route(
req.prompt,
task=TaskType.CODE_SMALL,
max_tokens=req.max_tokens or 2000,
preferred=req.preferred,
)
@app.post("/reason")
async def reason(req: CommandRequest):
"""Shortcut for REASONING task routing."""
return await route(
req.prompt,
task=TaskType.REASONING,
max_tokens=req.max_tokens or 1500,
preferred=req.preferred,
)
@app.post("/chat")
async def chat(req: CommandRequest):
"""Shortcut for CHAT task routing."""
return await route(
req.prompt,
task=TaskType.CHAT,
max_tokens=req.max_tokens or 800,
preferred=req.preferred,
)
@app.post("/v1/chat/completions")
async def openai_compat(req: OAIChatRequest):
"""
OpenAI-compatible endpoint. Drop-in replacement for openai.ChatCompletion.
Usage:
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8000/v1", api_key="not-needed")
response = client.chat.completions.create(
model="auto",
messages=[{"role": "user", "content": "Hello"}]
)
"""
# Flatten messages to a single prompt (simple approach — extend for multi-turn)
prompt_parts = []
for msg in req.messages:
if msg.role == "system":
prompt_parts.append(f"[System]: {msg.content}")
elif msg.role == "user":
prompt_parts.append(msg.content)
elif msg.role == "assistant":
prompt_parts.append(f"[Assistant]: {msg.content}")
prompt = "\n".join(prompt_parts)
# Map model hint to task type
model_hint = (req.model or "auto").lower()
if "code" in model_hint:
task = TaskType.CODE_SMALL
elif "reason" in model_hint:
task = TaskType.REASONING
elif "vision" in model_hint:
task = TaskType.VISION
else:
task = TaskType.CHAT
result = await route(prompt, task=task, max_tokens=req.max_tokens or 1000)
if not result.get("response"):
raise HTTPException(status_code=503, detail="All providers failed. Check /health for status.")
# Return in OpenAI response shape
created = int(time.time())
return {
"id": f"icarus-{created}",
"object": "chat.completion",
"created": created,
"model": result.get("model", "unknown"),
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": result["response"],
},
"finish_reason": "stop",
}],
"usage": {
"prompt_tokens": len(prompt.split()),
"completion_tokens": len((result.get("response") or "").split()),
"total_tokens": len(prompt.split()) + len((result.get("response") or "").split()),
},
"x_icarus": {
"provider": result.get("provider"),
"latency_ms": result.get("latency_ms"),
"task": result.get("task"),
"attempts": result.get("attempts", []),
},
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)