from fastapi import FastAPI, Request, HTTPException from pydantic import BaseModel from typing import Annotated, Any, Optional import asyncio from redis.asyncio import Redis import httpx import os app = FastAPI() @app.post("/") def health_check(): return "Healthy" password = os.environ.get("PASSWORD") r = Redis( host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com', port=15562, decode_responses=True, username="default", password=password, ) Model_links = { "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama", "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen", "vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/vicuna", "gemma3:12b":"https://npmaiecosystem-gemma312b.hf.space/gemma312b", "internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/internlm", "falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/falcon", "codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama", "mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/mistral", "phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/phi3medium", "qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb", "gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b", "llama3.2_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_llama", "qwen2.5-coder:7b_fall":"https://sonuramashish22028704-mistral7b.hf.space/llm_fall_qwen2", "vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna", "gemma3:12b_fall":"https://npmaiecosystem-gemma312b_fall.hf.space/llm_fall_gemma312b", "internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm", "falcon:7b-instruct_fall":"https://sonuramashishnpm-model1.hf.space/llm_fall_falcon", "codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_codellama", "mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral", "phi3:medium_fall":"https://sonuramashishnpm-model1.hf.space/llama_fall_phi", "qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359gb", "gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b" } # Updated Lua Script LUA_CHECK_AND_INC = """ local key = KEYS[1] local status = tonumber(redis.call('HGET', key, 'status') or '0') if status < 1 then redis.call('HSET', key, 'status', status + 1) return status end return -1 """ LUA_REMOVAL_STATUS = """ local key = KEYS[1] local status = tonumber(redis.call('HGET', key, 'status') or '0') if status > 0 then redis.call('HSET', key, 'status', status -1) return status -1 end return 0 """ async def check_cond(model_link: str, fall_model: Optional[list] = None): status = await r.eval(LUA_CHECK_AND_INC, 1, model_link) if status != -1: return {"link": model_link, "statusno": status} if fall_model: for model in fall_model: status = await r.eval(LUA_CHECK_AND_INC, 1, model) if status != -1: return {"link": model, "statusno": status} else: for model in Model_links.values(): status = await r.eval(LUA_CHECK_AND_INC, 1, model) if status != -1: return {"link": model, "statusno": status} return None class Input(BaseModel): model: str temperature: float = 0.5 prompt: str change: bool = True Models: Optional[list] = None @app.post("/load_balancer") async def llm_router(inputs: Input): if not inputs.model or not inputs.prompt: raise HTTPException(status_code=400, detail="Model name and Prompt are required.") if inputs.model not in Model_links: raise HTTPException(status_code=444, detail="Model not found.") model_link = Model_links[inputs.model] fall_links = [] fall_models = inputs.Models if inputs.change and fall_models: for m in fall_models: model_name = f"{m}_fall" if model_name in Model_links.keys(): link = Model_links[model_name] fall_links.append(link) else: raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary") model_cond = await check_cond(model_link=model_link, fall_model=fall_links) if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None: return await router( model_url=model_cond["link"], prompt=inputs.prompt, temp=inputs.temperature ) else: raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.") async def router(model_url, prompt, temp): error_log = "" process= "" payload = {"prompt": prompt, "temperature": temp} timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0) try: async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post(model_url, json=payload) response.raise_for_status() f_response = response.json()["response"] if f_response is not None and str(f_response).strip() != "": process += f_response else: raise ValueError("Empty string or None returned in response from LLM") except Exception as e: error_log += f"LLM backend error: {str(e)}" finally: await r.eval(LUA_REMOVAL_STATUS, 1, model_url) if error_log: raise HTTPException(status_code=502, detail=error_log) else: return {"response": process}