Spaces:
Running
Running
| from fastapi import FastAPI, Request, HTTPException | |
| from pydantic import BaseModel | |
| from typing import Annotated, Any, Optional | |
| import asyncio | |
| from redis.asyncio import Redis | |
| import httpx | |
| import os | |
| app = FastAPI() | |
| def health_check(): | |
| return "Healthy" | |
| password = os.environ.get("PASSWORD") | |
| r = Redis( | |
| host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com', | |
| port=15562, | |
| decode_responses=True, | |
| username="default", | |
| password=password, | |
| ) | |
| Model_links = { | |
| "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama", | |
| "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen", | |
| "vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/vicuna", | |
| "gemma3:12b":"https://npmaiecosystem-gemma312b.hf.space/gemma312b", | |
| "internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/internlm", | |
| "falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/falcon", | |
| "codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama", | |
| "mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/mistral", | |
| "phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/phi3medium", | |
| "qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb", | |
| "gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b", | |
| "llama3.2_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_llama", | |
| "qwen2.5-coder:7b_fall":"https://sonuramashish22028704-mistral7b.hf.space/llm_fall_qwen2", | |
| "vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna", | |
| "gemma3:12b_fall":"https://npmaiecosystem-gemma312b_fall.hf.space/llm_fall_gemma312b", | |
| "internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm", | |
| "falcon:7b-instruct_fall":"https://sonuramashishnpm-model1.hf.space/llm_fall_falcon", | |
| "codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_codellama", | |
| "mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral", | |
| "phi3:medium_fall":"https://sonuramashishnpm-model1.hf.space/llama_fall_phi", | |
| "qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359gb", | |
| "gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b" | |
| } | |
| # Updated Lua Script | |
| LUA_CHECK_AND_INC = """ | |
| local key = KEYS[1] | |
| local status = tonumber(redis.call('HGET', key, 'status') or '0') | |
| if status < 1 then | |
| redis.call('HSET', key, 'status', status + 1) | |
| return status | |
| end | |
| return -1 | |
| """ | |
| LUA_REMOVAL_STATUS = """ | |
| local key = KEYS[1] | |
| local status = tonumber(redis.call('HGET', key, 'status') or '0') | |
| if status > 0 then | |
| redis.call('HSET', key, 'status', status -1) | |
| return status -1 | |
| end | |
| return 0 | |
| """ | |
| async def check_cond(model_link: str, fall_model: Optional[list] = None): | |
| status = await r.eval(LUA_CHECK_AND_INC, 1, model_link) | |
| if status != -1: | |
| return {"link": model_link, "statusno": status} | |
| if fall_model: | |
| for model in fall_model: | |
| status = await r.eval(LUA_CHECK_AND_INC, 1, model) | |
| if status != -1: | |
| return {"link": model, "statusno": status} | |
| else: | |
| for model in Model_links.values(): | |
| status = await r.eval(LUA_CHECK_AND_INC, 1, model) | |
| if status != -1: | |
| return {"link": model, "statusno": status} | |
| return None | |
| class Input(BaseModel): | |
| model: str | |
| temperature: float = 0.5 | |
| prompt: str | |
| change: bool = True | |
| Models: Optional[list] = None | |
| async def llm_router(inputs: Input): | |
| if not inputs.model or not inputs.prompt: | |
| raise HTTPException(status_code=400, detail="Model name and Prompt are required.") | |
| if inputs.model not in Model_links: | |
| raise HTTPException(status_code=444, detail="Model not found.") | |
| model_link = Model_links[inputs.model] | |
| fall_links = [] | |
| fall_models = inputs.Models | |
| if inputs.change and fall_models: | |
| for m in fall_models: | |
| model_name = f"{m}_fall" | |
| if model_name in Model_links.keys(): | |
| link = Model_links[model_name] | |
| fall_links.append(link) | |
| else: | |
| raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary") | |
| model_cond = await check_cond(model_link=model_link, fall_model=fall_links) | |
| if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None: | |
| return await router( | |
| model_url=model_cond["link"], | |
| prompt=inputs.prompt, | |
| temp=inputs.temperature | |
| ) | |
| else: | |
| raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.") | |
| async def router(model_url, prompt, temp): | |
| error_log = "" | |
| process= "" | |
| payload = {"prompt": prompt, "temperature": temp} | |
| timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0) | |
| try: | |
| async with httpx.AsyncClient(timeout=timeout) as client: | |
| response = await client.post(model_url, json=payload) | |
| response.raise_for_status() | |
| f_response = response.json()["response"] | |
| if f_response is not None and str(f_response).strip() != "": | |
| process += f_response | |
| else: | |
| raise ValueError("Empty string or None returned in response from LLM") | |
| except Exception as e: | |
| error_log += f"LLM backend error: {str(e)}" | |
| finally: | |
| await r.eval(LUA_REMOVAL_STATUS, 1, model_url) | |
| if error_log: | |
| raise HTTPException(status_code=502, detail=error_log) | |
| else: | |
| return {"response": process} |