Spaces:
Running
Running
File size: 5,738 Bytes
d6825e4 bfb02bc d6825e4 2acec44 d6825e4 8ca1c48 d6825e4 bfb02bc d6825e4 ed9498c 8ca1c48 699b46c a310c33 d6825e4 9283813 d6825e4 a310c33 bfb02bc d80276e bfb02bc d80276e f8c3d65 d80276e bfb02bc d80276e bfb02bc d80276e bfb02bc f8c3d65 bfb02bc d80276e bfb02bc d80276e bfb02bc d6825e4 bfb02bc d6825e4 bfb02bc d6825e4 2cb98b5 d6825e4 bfb02bc d6825e4 2cb98b5 bfb02bc 0331354 bfb02bc 0331354 bfb02bc 0331354 bfb02bc d6825e4 bfb02bc 0331354 bfb02bc d6825e4 bfb02bc d6825e4 bfb02bc d6825e4 bfb02bc 2cb98b5 bfb02bc c4b4be8 bfb02bc 0331354 bfb02bc 0331354 2cb98b5 bfb02bc d6825e4 bfb02bc 0331354 bfb02bc 2cb98b5 d6825e4 2cb98b5 bfb02bc 2cb98b5 bfb02bc 2cb98b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
from typing import Annotated, Any, Optional
import asyncio
from redis.asyncio import Redis
import httpx
import os
app = FastAPI()
@app.post("/")
def health_check():
return "Healthy"
password = os.environ.get("PASSWORD")
r = Redis(
host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
port=15562,
decode_responses=True,
username="default",
password=password,
)
Model_links = {
"llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
"qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
"vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/vicuna",
"gemma3:12b":"https://npmaiecosystem-gemma312b.hf.space/gemma312b",
"internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/internlm",
"falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/falcon",
"codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
"mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/mistral",
"phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/phi3medium",
"qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
"gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
"llama3.2_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_llama",
"qwen2.5-coder:7b_fall":"https://sonuramashish22028704-mistral7b.hf.space/llm_fall_qwen2",
"vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
"gemma3:12b_fall":"https://npmaiecosystem-gemma312b_fall.hf.space/llm_fall_gemma312b",
"internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
"falcon:7b-instruct_fall":"https://sonuramashishnpm-model1.hf.space/llm_fall_falcon",
"codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_codellama",
"mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
"phi3:medium_fall":"https://sonuramashishnpm-model1.hf.space/llama_fall_phi",
"qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359gb",
"gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
}
# Updated Lua Script
LUA_CHECK_AND_INC = """
local key = KEYS[1]
local status = tonumber(redis.call('HGET', key, 'status') or '0')
if status < 1 then
redis.call('HSET', key, 'status', status + 1)
return status
end
return -1
"""
LUA_REMOVAL_STATUS = """
local key = KEYS[1]
local status = tonumber(redis.call('HGET', key, 'status') or '0')
if status > 0 then
redis.call('HSET', key, 'status', status -1)
return status -1
end
return 0
"""
async def check_cond(model_link: str, fall_model: Optional[list] = None):
status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
if status != -1:
return {"link": model_link, "statusno": status}
if fall_model:
for model in fall_model:
status = await r.eval(LUA_CHECK_AND_INC, 1, model)
if status != -1:
return {"link": model, "statusno": status}
else:
for model in Model_links.values():
status = await r.eval(LUA_CHECK_AND_INC, 1, model)
if status != -1:
return {"link": model, "statusno": status}
return None
class Input(BaseModel):
model: str
temperature: float = 0.5
prompt: str
change: bool = True
Models: Optional[list] = None
@app.post("/load_balancer")
async def llm_router(inputs: Input):
if not inputs.model or not inputs.prompt:
raise HTTPException(status_code=400, detail="Model name and Prompt are required.")
if inputs.model not in Model_links:
raise HTTPException(status_code=444, detail="Model not found.")
model_link = Model_links[inputs.model]
fall_links = []
fall_models = inputs.Models
if inputs.change and fall_models:
for m in fall_models:
model_name = f"{m}_fall"
if model_name in Model_links.keys():
link = Model_links[model_name]
fall_links.append(link)
else:
raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary")
model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
return await router(
model_url=model_cond["link"],
prompt=inputs.prompt,
temp=inputs.temperature
)
else:
raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
async def router(model_url, prompt, temp):
error_log = ""
process= ""
payload = {"prompt": prompt, "temperature": temp}
timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
try:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(model_url, json=payload)
response.raise_for_status()
f_response = response.json()["response"]
if f_response is not None and str(f_response).strip() != "":
process += f_response
else:
raise ValueError("Empty string or None returned in response from LLM")
except Exception as e:
error_log += f"LLM backend error: {str(e)}"
finally:
await r.eval(LUA_REMOVAL_STATUS, 1, model_url)
if error_log:
raise HTTPException(status_code=502, detail=error_log)
else:
return {"response": process} |