npmaiecosystem commited on
Commit
4e422c5
·
verified ·
1 Parent(s): 85b40e0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Annotated, Any, Optional
4
+ import asyncio
5
+ from redis.asyncio import Redis
6
+ import httpx
7
+ import os
8
+
9
+ app = FastAPI()
10
+
11
+ password = os.environ.get("PASSWORD")
12
+ r = Redis(
13
+ host='redislabs.com',
14
+ port=15562,
15
+ decode_responses=True,
16
+ username="default",
17
+ password=password,
18
+ )
19
+
20
+ Model_links = {
21
+ "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llm",
22
+ "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
23
+ "vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/llm",
24
+ "gemma3:12b":"https://sonuramashish22028704-vicuna7b.hf.space/gemma",
25
+ "internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/llm",
26
+ "maxkb/baichuan2:13b-chat":"https://sonuramashish22028704-internlm27b.hf.space/baichuan",
27
+ "falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/llm",
28
+ "codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
29
+ "mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/llm",
30
+ "phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/llm",
31
+ "qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
32
+ "gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
33
+ "llama3.2_fall":"https://sonuramashishnpm-model1.hf.space/llamafall",
34
+ "qwen2.5-coder:7b_fall":"https://sonuramashishnpm-model1.hf.space/qwenfall",
35
+ "vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
36
+ "gemma3:12b_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_gemma312b",
37
+ "internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
38
+ "falcon:7b-instruct_fall":"https://sonuramashishnpm-mistral7b.hf.space/llm_fall_falcon",
39
+ "codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/codellamafall",
40
+ "mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
41
+ "phi3:medium_fall":"https://sonuramashishnpm-.hf.space/",
42
+ "qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359b",
43
+ "gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
44
+ }
45
+
46
+ # Updated Lua Script
47
+ LUA_CHECK_AND_INC = """
48
+ local key = KEYS[1]
49
+ local status = tonumber(redis.call('HGET', key, 'status') or '0')
50
+ if status < 2 then
51
+ redis.call('HSET', key, 'status', status + 1)
52
+ return status
53
+ end
54
+ return -1
55
+ """
56
+
57
+ async def check_cond(model_link: str, fall_model: Optional[list] = None):
58
+ status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
59
+ if status != -1:
60
+ return {"link": model_link, "statusno": status}
61
+
62
+ if fall_model:
63
+ for model in fall_model:
64
+ status = await r.eval(LUA_CHECK_AND_INC, 1, model)
65
+ if status != -1:
66
+ return {"link": model, "statusno": status}
67
+
68
+ else:
69
+ for model in Model_links.values():
70
+ status = await r.eval(LUA_CHECK_AND_INC, 1, model)
71
+ if status != -1:
72
+ return {"link": model, "statusno": status}
73
+
74
+ return None
75
+
76
+
77
+ class Input(BaseModel):
78
+ model: str
79
+ temperature: float = 0.5
80
+ prompt: str
81
+ change: bool = True
82
+ Models: Optional[list] = None
83
+
84
+ @app.post("/load_balancer")
85
+ async def llm_router(inputs: Input):
86
+ if not inputs.model or not inputs.prompt:
87
+ raise HTTPException(status_code=400, detail="Model name and Prompt are required.")
88
+
89
+ if inputs.model not in Model_links:
90
+ raise HTTPException(status_code=444, detail="Model not found.")
91
+
92
+ model_link = Model_links[inputs.model]
93
+ fall_links = []
94
+
95
+ fall_models = inputs.Models
96
+ if inputs.change and fall_models:
97
+ for m in fall_models:
98
+ model_name = f"{m}_fall"
99
+ link = Model_links[model_name]
100
+ fall_links.append(link)
101
+
102
+ model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
103
+
104
+ if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
105
+ return await router(
106
+ model_cond=model_cond["link"],
107
+ statusno=model_cond["statusno"],
108
+ prompt=inputs.prompt,
109
+ temp=inputs.temperature
110
+ )
111
+ else:
112
+ raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
113
+
114
+ async def router(model_url, prompt, temp):
115
+ payload = {"prompt": prompt, "temperature": temp}
116
+ timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
117
+
118
+ try:
119
+ async with httpx.AsyncClient(timeout=timeout) as client:
120
+ response = await client.post(model_url, json=payload)
121
+ response.raise_for_status()
122
+ process = response.json()["response"]
123
+ except Exception as e:
124
+ current_status = int(await r.hget(model_url, "status") or 0)
125
+ if current_status > 0:
126
+ await r.hset(model_url, "status", current_status - 1)
127
+ raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
128
+
129
+ current_status = int(await r.hget(model_url, "status") or 0)
130
+ if current_status > 0:
131
+ await r.hset(model_url, "status", current_status - 1)
132
+
133
+ return {"response": process}