Spaces:

LlewellynSystemsInc
/

icarus-router

Sleeping

staff

Upload server.py with huggingface_hub

779e956 verified about 1 month ago

6.94 kB

	"""
	Icarus Router — FastAPI server
	==============================
	OpenAI-compatible LLM routing gateway. Self-hosted. $0/month.

	Endpoints:
	GET / — info
	GET /health — probe all providers in parallel
	GET /providers — list providers, routing table, model map
	POST /command — general routing (specify task type)
	POST /code — shortcut: CODE_SMALL chain
	POST /reason — shortcut: REASONING chain
	POST /chat — shortcut: CHAT chain
	POST /v1/chat/completions — OpenAI-compatible endpoint

	Run:
	uvicorn server:app --host 0.0.0.0 --port 8000

	License: Apache 2.0
	Author: Llewellyn Systems — https://www.llewellynsystems.com
	"""
	from __future__ import annotations

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from typing import Optional
	import time

	from router import route, health_check, TaskType, MODEL_MAP, ROUTING_TABLE, PROVIDERS

	app = FastAPI(
	title="Icarus Router",
	version="1.0.0",
	description="Multi-provider LLM routing gateway — $0/month free-tier focused",
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)


	# ============================================================================
	# REQUEST / RESPONSE MODELS
	# ============================================================================

	class CommandRequest(BaseModel):
	prompt: str
	task: Optional[str] = "reasoning"
	max_tokens: Optional[int] = 1000
	preferred: Optional[list[str]] = None


	# OpenAI-compatible models
	class OAIMessage(BaseModel):
	role: str
	content: str

	class OAIChatRequest(BaseModel):
	model: Optional[str] = "auto"
	messages: list[OAIMessage]
	max_tokens: Optional[int] = 1000
	stream: Optional[bool] = False


	# ============================================================================
	# HELPERS
	# ============================================================================

	def _task(name: str) -> TaskType:
	try:
	return TaskType(name)
	except ValueError:
	raise HTTPException(status_code=400, detail=f"Unknown task type: '{name}'. Valid: {[t.value for t in TaskType]}")


	# ============================================================================
	# ROUTES
	# ============================================================================

	@app.get("/")
	async def root():
	return {
	"name": "Icarus Router",
	"version": "1.0.0",
	"description": "Multi-provider LLM routing gateway — $0/month",
	"providers": list(PROVIDERS.keys()),
	"docs": "/docs",
	"health": "/health",
	"built_by": "Llewellyn Systems — https://www.llewellynsystems.com",
	}


	@app.get("/health")
	async def health():
	"""Probe all configured providers in parallel. Shows which are live."""
	return await health_check()


	@app.get("/providers")
	async def providers():
	"""Return the full routing table and model map."""
	return {
	"providers": list(PROVIDERS.keys()),
	"routing_table": {t.value: ps for t, ps in ROUTING_TABLE.items()},
	"model_map": {
	p: {t.value: m for t, m in tm.items()}
	for p, tm in MODEL_MAP.items()
	},
	}


	@app.post("/command")
	async def command(req: CommandRequest):
	"""
	General routing endpoint. Specify task to control provider chain.
	Returns response + provider metadata.
	"""
	return await route(
	req.prompt,
	task=_task(req.task or "reasoning"),
	max_tokens=req.max_tokens or 1000,
	preferred=req.preferred,
	)


	@app.post("/code")
	async def code(req: CommandRequest):
	"""Shortcut for CODE_SMALL task routing."""
	return await route(
	req.prompt,
	task=TaskType.CODE_SMALL,
	max_tokens=req.max_tokens or 2000,
	preferred=req.preferred,
	)


	@app.post("/reason")
	async def reason(req: CommandRequest):
	"""Shortcut for REASONING task routing."""
	return await route(
	req.prompt,
	task=TaskType.REASONING,
	max_tokens=req.max_tokens or 1500,
	preferred=req.preferred,
	)


	@app.post("/chat")
	async def chat(req: CommandRequest):
	"""Shortcut for CHAT task routing."""
	return await route(
	req.prompt,
	task=TaskType.CHAT,
	max_tokens=req.max_tokens or 800,
	preferred=req.preferred,
	)


	@app.post("/v1/chat/completions")
	async def openai_compat(req: OAIChatRequest):
	"""
	OpenAI-compatible endpoint. Drop-in replacement for openai.ChatCompletion.

	Usage:
	from openai import OpenAI
	client = OpenAI(base_url="http://localhost:8000/v1", api_key="not-needed")
	response = client.chat.completions.create(
	model="auto",
	messages=[{"role": "user", "content": "Hello"}]
	)
	"""
	# Flatten messages to a single prompt (simple approach — extend for multi-turn)
	prompt_parts = []
	for msg in req.messages:
	if msg.role == "system":
	prompt_parts.append(f"[System]: {msg.content}")
	elif msg.role == "user":
	prompt_parts.append(msg.content)
	elif msg.role == "assistant":
	prompt_parts.append(f"[Assistant]: {msg.content}")
	prompt = "\n".join(prompt_parts)

	# Map model hint to task type
	model_hint = (req.model or "auto").lower()
	if "code" in model_hint:
	task = TaskType.CODE_SMALL
	elif "reason" in model_hint:
	task = TaskType.REASONING
	elif "vision" in model_hint:
	task = TaskType.VISION
	else:
	task = TaskType.CHAT

	result = await route(prompt, task=task, max_tokens=req.max_tokens or 1000)

	if not result.get("response"):
	raise HTTPException(status_code=503, detail="All providers failed. Check /health for status.")

	# Return in OpenAI response shape
	created = int(time.time())
	return {
	"id": f"icarus-{created}",
	"object": "chat.completion",
	"created": created,
	"model": result.get("model", "unknown"),
	"choices": [{
	"index": 0,
	"message": {
	"role": "assistant",
	"content": result["response"],
	},
	"finish_reason": "stop",
	}],
	"usage": {
	"prompt_tokens": len(prompt.split()),
	"completion_tokens": len((result.get("response") or "").split()),
	"total_tokens": len(prompt.split()) + len((result.get("response") or "").split()),
	},
	"x_icarus": {
	"provider": result.get("provider"),
	"latency_ms": result.get("latency_ms"),
	"task": result.get("task"),
	"attempts": result.get("attempts", []),
	},
	}


	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)