File size: 5,734 Bytes
4e422c5
 
 
 
 
 
 
 
 
 
314c40e
 
 
 
4e422c5
 
ef42d48
4e422c5
 
 
 
 
 
314c40e
 
4e422c5
ee68267
4e422c5
ee68267
bf9529f
ee68267
 
4e422c5
ee68267
 
4e422c5
 
ee68267
 
4e422c5
bf9529f
4e422c5
ee68267
 
4e422c5
ee68267
 
4e422c5
 
 
 
 
 
 
314c40e
4e422c5
 
 
 
 
 
314c40e
 
 
 
 
 
 
 
 
 
4e422c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314c40e
 
 
 
 
4e422c5
 
 
 
 
314c40e
4e422c5
 
 
 
 
 
 
314c40e
 
4e422c5
 
 
 
 
 
 
314c40e
 
 
 
 
 
4e422c5
314c40e
4e422c5
314c40e
 
4e422c5
314c40e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
from typing import Annotated, Any, Optional
import asyncio
from redis.asyncio import Redis
import httpx
import os

app = FastAPI()

@app.post("/")
def health_check():
    return "Healthy"

password = os.environ.get("PASSWORD")
r = Redis(
    host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
    port=15562,
    decode_responses=True,
    username="default",
    password=password,
)



Model_links = {
    "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
    "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
    "vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/vicuna",
    "gemma3:12b":"https://npmaiecosystem-gemma312b.hf.space/gemma312b",
    "internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/internlm",
    "falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/falcon",
    "codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
    "mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/mistral",
    "phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/phi3medium",
    "qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
    "gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
    "llama3.2_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_llama",
    "qwen2.5-coder:7b_fall":"https://sonuramashish22028704-mistral7b.hf.space/llm_fall_qwen2",
    "vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
    "gemma3:12b_fall":"https://npmaiecosystem-gemma312b_fall.hf.space/llm_fall_gemma312b",
    "internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
    "falcon:7b-instruct_fall":"https://sonuramashishnpm-model1.hf.space/llm_fall_falcon",
    "codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_codellama",
    "mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
    "phi3:medium_fall":"https://sonuramashishnpm-model1.hf.space/llama_fall_phi",
    "qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359gb",
    "gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
}

# Updated Lua Script
LUA_CHECK_AND_INC = """
local key = KEYS[1]
local status = tonumber(redis.call('HGET', key, 'status') or '0')
if status < 1 then
    redis.call('HSET', key, 'status', status + 1)
    return status
end
return -1
"""

LUA_REMOVAL_STATUS = """
local key = KEYS[1]
local status = tonumber(redis.call('HGET', key, 'status') or '0')
if status > 0 then
    redis.call('HSET', key, 'status', status -1)
    return status -1
end
return 0
"""

async def check_cond(model_link: str, fall_model: Optional[list] = None):
    status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
    if status != -1:
        return {"link": model_link, "statusno": status}

    if fall_model:
        for model in fall_model:
            status = await r.eval(LUA_CHECK_AND_INC, 1, model)
            if status != -1:
                return {"link": model, "statusno": status}
                
    else:
        for model in Model_links.values():
            status = await r.eval(LUA_CHECK_AND_INC, 1, model)
            if status != -1:
                return {"link": model, "statusno": status}

    return None


class Input(BaseModel):
    model: str
    temperature: float = 0.5
    prompt: str
    change: bool = True
    Models: Optional[list] = None

@app.post("/load_balancer")
async def llm_router(inputs: Input):
    if not inputs.model or not inputs.prompt:
        raise HTTPException(status_code=400, detail="Model name and Prompt are required.")

    if inputs.model not in Model_links:
        raise HTTPException(status_code=444, detail="Model not found.")

    model_link = Model_links[inputs.model]
    fall_links = []

    fall_models = inputs.Models
    if inputs.change and fall_models:
        for m in fall_models:
            model_name = f"{m}_fall"
            if model_name in Model_links.keys():
                link = Model_links[model_name]
                fall_links.append(link)
            else:
                raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary")

    model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
    
    if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
        return await router(
            model_url=model_cond["link"],
            prompt=inputs.prompt,
            temp=inputs.temperature
        )
    else:
        raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")

async def router(model_url, prompt, temp):
    error_log = ""
    process= ""
    payload = {"prompt": prompt, "temperature": temp}
    timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)

    try:
        async with httpx.AsyncClient(timeout=timeout) as client:
            response = await client.post(model_url, json=payload)
            response.raise_for_status()
            f_response = response.json()["response"]
            if f_response is not None and str(f_response).strip() != "":
                process += f_response

            else:
                raise ValueError("Empty string or None returned in response from LLM")
    except Exception as e:
        error_log += f"LLM backend error: {str(e)}"

    finally:
        await r.eval(LUA_REMOVAL_STATUS, 1, model_url)

    if error_log:
        raise HTTPException(status_code=502, detail=error_log)
    else:
        return {"response": process}