npmaiecosystem commited on
Commit
bfb02bc
·
verified ·
1 Parent(s): 198f999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -100
app.py CHANGED
@@ -1,16 +1,14 @@
1
  from fastapi import FastAPI, Request, HTTPException
2
  from pydantic import BaseModel
3
- from typing import Annotated, Any
4
  import asyncio
5
  from redis.asyncio import Redis
6
  import httpx
7
  import os
8
 
9
- #FastAPI Initialistaion
10
- app= FastAPI()
11
 
12
  password = os.environ.get("PASSWORD")
13
- #Redis Initialistaion
14
  r = Redis(
15
  host='redislabs.com',
16
  port=15562,
@@ -18,119 +16,133 @@ r = Redis(
18
  username="default",
19
  password=password,
20
  )
21
- print(r)
22
 
23
- Model_links={
24
- "model":"link"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
 
27
- Model_fall_links = {
28
- "model":"link"
29
- }
30
-
31
- #Lua Script
32
  LUA_CHECK_AND_INC = """
33
  local key = KEYS[1]
 
 
34
  local status = tonumber(redis.call('HGET', key, 'status') or '0')
35
  if status < 2 then
36
  redis.call('HSET', key, 'status', status + 1)
37
- return status -- returns the old status (0 or 1) so you know what it was
38
- else
39
- return -1 -- means busy
 
 
 
 
40
  end
 
 
41
  """
42
 
43
- async def check_cond(model_link,fall_model=None):
44
- status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
45
- if status != -1:
46
- return {"link": model_link, "statusno": status}
47
-
48
- if fall_model != None:
49
- for model in fall_model:
50
- status = await r.eval(LUA_CHECK_AND_INC, 1, model)
51
- if status != -1:
52
- return {"link": model, "statusno": status}
53
- else:
54
- key = Model_links.keys()
55
- for model in key:
56
- status = await r.eval(LUA_CHECK_AND_INC, 1, Model_links[model])
57
- if status != -1:
58
- return {"link": Model_links[model], "statusno": status}
59
-
60
- return "Your requested model and other models are busy try again."
61
-
62
- #Input initialistaion
63
- class Input(BaseModel):
64
- model:str
65
- temperature:float = 0.5
66
- prompt:str
67
- change:bool = True
68
- Models:list = None
69
 
 
 
 
 
 
 
 
 
70
 
71
  @app.post("/load_balancer")
72
  async def llm_router(inputs: Input):
73
- if not inputs.model and inputs.prompt:
74
- return HTTPException(status_code=404, detail="Model name and Prompt is required.")
75
 
76
- if inputs.model in Model_links.keys():
77
- model_link = Model_links[inputs.model]
78
- if inputs.change:
79
- if inputs.Models != None:
80
- fall_links = [Model_links[m] for m in inputs.Models if m in Model_links]
81
- model_cond = await check_cond(model_link=model_link,fall_model=fall_links)
82
-
83
- if isinstance(model_cond, dict) and model_cond.get("link") and model_cond.get("statusno") is not None:
84
- return await router(model_cond=model_cond["link"],statusno=model_cond["statusno"], prompt = inputs.prompt, temp = inputs.temperature)
85
- else:
86
- return HTTPException(status=404, detail="Sorry We are not able to serve your requests due to not available rooms try again later.")
87
- else:
88
- model_cond = await check_cond(model_link=model_link)
89
- if isinstance(model_cond, dict) and model_cond.get("link") and model_cond.get("statusno") is not None:
90
- return await router(model_cond=model_cond["link"],statusno=model_cond["statusno"], prompt = inputs.prompt, temp = inputs.temperature)
91
- else:
92
- return HTTPException(status=404, detail="Sorry We are not able to serve your requests due to not available rooms try again later.")
93
- else:
94
- model_cond = await check_cond(model_link=model_link)
95
- if isinstance(model_cond, dict) and model_cond.get("link") and model_cond.get("statusno") is not None:
96
- return await router(model_cond=model_cond["link"],statusno=model_cond["statusno"],prompt = inputs.prompt)
97
- else:
98
- return HTTPException(status=404, detail="Sorry We are not able to serve your requests due to not available rooms try again later.")
99
-
100
- async def router(model_cond, statusno, prompt, temp):
101
- payload ={
102
- "prompt":prompt,
103
- "temperature":temp
104
- }
105
-
106
- timeout = httpx.Timeout(
107
- connect=30.0, # connection ka max time
108
- read=360.0, # response read karne ka max time
109
- write=30.0, # request bhejne ka max time
110
- pool=120.0 # connection pool wait
111
- )
112
-
113
- if statusno == 0:
114
- try:
115
- async with httpx.AsyncClient(timeout=timeout) as client:
116
- response = await client.post(model_cond,json=payload)
117
- process = response.json()["response"]
118
- except Exception as e:
119
- return {"response":e}
120
- pass
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- elif statusno == 1:
124
  try:
125
- async with httpx.AsyncClient(timeout=timeout) as client:
126
- response = await client.post(model_cond,json=payload)
127
- process = response.json()["response"]
 
128
  except Exception as e:
129
- return {"response":e}
130
- pass
131
-
132
- current_status = int(r.hget(model_cond, "status") or 0)
133
- if current_status > 0:
134
- await r.hset(model_cond, "status", current_status - 1)
135
-
136
- return {"response":process}
 
 
 
 
 
1
  from fastapi import FastAPI, Request, HTTPException
2
  from pydantic import BaseModel
3
+ from typing import Annotated, Any, Optional
4
  import asyncio
5
  from redis.asyncio import Redis
6
  import httpx
7
  import os
8
 
9
+ app = FastAPI()
 
10
 
11
  password = os.environ.get("PASSWORD")
 
12
  r = Redis(
13
  host='redislabs.com',
14
  port=15562,
 
16
  username="default",
17
  password=password,
18
  )
 
19
 
20
+ Model_links = {
21
+ "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llm",
22
+ "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
23
+ "vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/llm",
24
+ "gemma3:12b":"https://sonuramashish22028704-vicuna7b.hf.space/gemma",
25
+ "internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/llm",
26
+ "maxkb/baichuan2:13b-chat":"https://sonuramashish22028704-internlm27b.hf.space/baichuan",
27
+ "falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/llm",
28
+ "codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
29
+ "mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/llm",
30
+ "phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/llm",
31
+ "qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
32
+ "gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
33
+ "llama3.2_fall":"https://sonuramashishnpm-model1.hf.space/llamafall",
34
+ "qwen2.5-coder:7b_fall":"https://sonuramashishnpm-model1.hf.space/qwenfall",
35
+ "vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
36
+ "gemma3:12b_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_gemma312b",
37
+ "internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
38
+ "falcon:7b-instruct_fall":"https://sonuramashishnpm-mistral7b.hf.space/llm_fall_falcon",
39
+ "codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/codellamafall",
40
+ "mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
41
+ "phi3:medium_fall":"https://sonuramashishnpm-.hf.space/",
42
+ "qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359b",
43
+ "gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
44
  }
45
 
46
+ # Updated Lua Script
 
 
 
 
47
  LUA_CHECK_AND_INC = """
48
  local key = KEYS[1]
49
+ local fallback_key = KEYS[2]
50
+
51
  local status = tonumber(redis.call('HGET', key, 'status') or '0')
52
  if status < 2 then
53
  redis.call('HSET', key, 'status', status + 1)
54
+ return status
55
+ end
56
+
57
+ local fall_status = tonumber(redis.call('HGET', fallback_key, 'status') or '0')
58
+ if fall_status < 2 then
59
+ redis.call('HSET', fallback_key, 'status', fall_status + 1)
60
+ return fall_status + 10
61
  end
62
+
63
+ return -1
64
  """
65
 
66
+ async def check_cond(model_link: str, fall_model: Optional[list] = None):
67
+ # Try requested model & its fall copy first
68
+ status = await r.eval(LUA_CHECK_AND_INC, 2, model_link, fall_model )
69
+ if status != -1:
70
+ return {"link": model_link, "statusno": status}
71
+
72
+ # Try custom fallback models array
73
+ if fall_model is not None:
74
+ for model in fall_model:
75
+ status = await r.eval(LUA_CHECK_AND_INC, 1, model)
76
+ if status != -1:
77
+ return {"link": model, "statusno": status}
78
+ # Try all systemic models
79
+ else:
80
+ for model in Model_links.values():
81
+ status = await r.eval(LUA_CHECK_AND_INC, 1, model)
82
+ if status != -1:
83
+ return {"link": model, "statusno": status}
 
 
 
 
 
 
 
 
84
 
85
+ return None
86
+
87
+ class Input(BaseModel):
88
+ model: str
89
+ temperature: float = 0.5
90
+ prompt: str
91
+ change: bool = True
92
+ Models: Optional[list] = None
93
 
94
  @app.post("/load_balancer")
95
  async def llm_router(inputs: Input):
96
+ if not inputs.model or not inputs.prompt:
97
+ raise HTTPException(status_code=400, detail="Model name and Prompt are required.")
98
 
99
+ if inputs.model not in Model_links:
100
+ raise HTTPException(status_code=444, detail="Model not found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ model_link = Model_links[inputs.model]
103
+ fall_links = []
104
+
105
+ fall_models = inputs.Models
106
+ if inputs.change and fall_models:
107
+ for m in fall_models:
108
+ model_name = f"{m}_fall"
109
+ link = Model_links[model_name]
110
+ fall_links.append(link)
111
+
112
+ model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
113
+
114
+ if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
115
+ return await router(
116
+ model_cond=model_cond["link"],
117
+ statusno=model_cond["statusno"],
118
+ prompt=inputs.prompt,
119
+ temp=inputs.temperature
120
+ )
121
+
122
+ raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
123
+
124
+ async def router(model_cond: str, statusno: int, prompt: str, temp: float):
125
+ payload = {"prompt": prompt, "temperature": temp}
126
+ timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
127
+
128
+ # Determine if we hit primary or fallback copy
129
+ target_key = model_cond if statusno < 10 else model_cond + "fall"
130
 
 
131
  try:
132
+ async with httpx.AsyncClient(timeout=timeout) as client:
133
+ response = await client.post(model_cond, json=payload)
134
+ response.raise_for_status()
135
+ process = response.json()["response"]
136
  except Exception as e:
137
+ # Decrement counter even if HTTP request crashes
138
+ current_status = int(await r.hget(target_key, "status") or 0)
139
+ if current_status > 0:
140
+ await r.hset(target_key, "status", current_status - 1)
141
+ raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
142
+
143
+ # Standard completion decrement
144
+ current_status = int(await r.hget(target_key, "status") or 0)
145
+ if current_status > 0:
146
+ await r.hset(target_key, "status", current_status - 1)
147
+
148
+ return {"response": process}