File size: 5,734 Bytes
4e422c5 314c40e 4e422c5 ef42d48 4e422c5 314c40e 4e422c5 ee68267 4e422c5 ee68267 bf9529f ee68267 4e422c5 ee68267 4e422c5 ee68267 4e422c5 bf9529f 4e422c5 ee68267 4e422c5 ee68267 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e 4e422c5 314c40e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
from typing import Annotated, Any, Optional
import asyncio
from redis.asyncio import Redis
import httpx
import os
app = FastAPI()
@app.post("/")
def health_check():
return "Healthy"
password = os.environ.get("PASSWORD")
r = Redis(
host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
port=15562,
decode_responses=True,
username="default",
password=password,
)
Model_links = {
"llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
"qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
"vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/vicuna",
"gemma3:12b":"https://npmaiecosystem-gemma312b.hf.space/gemma312b",
"internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/internlm",
"falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/falcon",
"codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
"mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/mistral",
"phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/phi3medium",
"qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
"gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
"llama3.2_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_llama",
"qwen2.5-coder:7b_fall":"https://sonuramashish22028704-mistral7b.hf.space/llm_fall_qwen2",
"vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
"gemma3:12b_fall":"https://npmaiecosystem-gemma312b_fall.hf.space/llm_fall_gemma312b",
"internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
"falcon:7b-instruct_fall":"https://sonuramashishnpm-model1.hf.space/llm_fall_falcon",
"codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_codellama",
"mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
"phi3:medium_fall":"https://sonuramashishnpm-model1.hf.space/llama_fall_phi",
"qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359gb",
"gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
}
# Updated Lua Script
LUA_CHECK_AND_INC = """
local key = KEYS[1]
local status = tonumber(redis.call('HGET', key, 'status') or '0')
if status < 1 then
redis.call('HSET', key, 'status', status + 1)
return status
end
return -1
"""
LUA_REMOVAL_STATUS = """
local key = KEYS[1]
local status = tonumber(redis.call('HGET', key, 'status') or '0')
if status > 0 then
redis.call('HSET', key, 'status', status -1)
return status -1
end
return 0
"""
async def check_cond(model_link: str, fall_model: Optional[list] = None):
status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
if status != -1:
return {"link": model_link, "statusno": status}
if fall_model:
for model in fall_model:
status = await r.eval(LUA_CHECK_AND_INC, 1, model)
if status != -1:
return {"link": model, "statusno": status}
else:
for model in Model_links.values():
status = await r.eval(LUA_CHECK_AND_INC, 1, model)
if status != -1:
return {"link": model, "statusno": status}
return None
class Input(BaseModel):
model: str
temperature: float = 0.5
prompt: str
change: bool = True
Models: Optional[list] = None
@app.post("/load_balancer")
async def llm_router(inputs: Input):
if not inputs.model or not inputs.prompt:
raise HTTPException(status_code=400, detail="Model name and Prompt are required.")
if inputs.model not in Model_links:
raise HTTPException(status_code=444, detail="Model not found.")
model_link = Model_links[inputs.model]
fall_links = []
fall_models = inputs.Models
if inputs.change and fall_models:
for m in fall_models:
model_name = f"{m}_fall"
if model_name in Model_links.keys():
link = Model_links[model_name]
fall_links.append(link)
else:
raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary")
model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
return await router(
model_url=model_cond["link"],
prompt=inputs.prompt,
temp=inputs.temperature
)
else:
raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
async def router(model_url, prompt, temp):
error_log = ""
process= ""
payload = {"prompt": prompt, "temperature": temp}
timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(model_url, json=payload)
response.raise_for_status()
f_response = response.json()["response"]
if f_response is not None and str(f_response).strip() != "":
process += f_response
else:
raise ValueError("Empty string or None returned in response from LLM")
except Exception as e:
error_log += f"LLM backend error: {str(e)}"
finally:
await r.eval(LUA_REMOVAL_STATUS, 1, model_url)
if error_log:
raise HTTPException(status_code=502, detail=error_log)
else:
return {"response": process} |