npmaiecosystem commited on
Commit
314c40e
·
verified ·
1 Parent(s): ef42d48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -14
app.py CHANGED
@@ -8,6 +8,10 @@ import os
8
 
9
  app = FastAPI()
10
 
 
 
 
 
11
  password = os.environ.get("PASSWORD")
12
  r = Redis(
13
  host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
@@ -17,6 +21,8 @@ r = Redis(
17
  password=password,
18
  )
19
 
 
 
20
  Model_links = {
21
  "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
22
  "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
@@ -46,13 +52,23 @@ Model_links = {
46
  LUA_CHECK_AND_INC = """
47
  local key = KEYS[1]
48
  local status = tonumber(redis.call('HGET', key, 'status') or '0')
49
- if status < 2 then
50
  redis.call('HSET', key, 'status', status + 1)
51
  return status
52
  end
53
  return -1
54
  """
55
 
 
 
 
 
 
 
 
 
 
 
56
  async def check_cond(model_link: str, fall_model: Optional[list] = None):
57
  status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
58
  if status != -1:
@@ -95,15 +111,17 @@ async def llm_router(inputs: Input):
95
  if inputs.change and fall_models:
96
  for m in fall_models:
97
  model_name = f"{m}_fall"
98
- link = Model_links[model_name]
99
- fall_links.append(link)
 
 
 
100
 
101
  model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
102
 
103
  if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
104
  return await router(
105
- model_cond=model_cond["link"],
106
- statusno=model_cond["statusno"],
107
  prompt=inputs.prompt,
108
  temp=inputs.temperature
109
  )
@@ -111,6 +129,8 @@ async def llm_router(inputs: Input):
111
  raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
112
 
113
  async def router(model_url, prompt, temp):
 
 
114
  payload = {"prompt": prompt, "temperature": temp}
115
  timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
116
 
@@ -118,15 +138,19 @@ async def router(model_url, prompt, temp):
118
  async with httpx.AsyncClient(timeout=timeout) as client:
119
  response = await client.post(model_url, json=payload)
120
  response.raise_for_status()
121
- process = response.json()["response"]
 
 
 
 
 
122
  except Exception as e:
123
- current_status = int(await r.hget(model_url, "status") or 0)
124
- if current_status > 0:
125
- await r.hset(model_url, "status", current_status - 1)
126
- raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
127
 
128
- current_status = int(await r.hget(model_url, "status") or 0)
129
- if current_status > 0:
130
- await r.hset(model_url, "status", current_status - 1)
131
 
132
- return {"response": process}
 
 
 
 
8
 
9
  app = FastAPI()
10
 
11
+ @app.post("/")
12
+ def health_check():
13
+ return "Healthy"
14
+
15
  password = os.environ.get("PASSWORD")
16
  r = Redis(
17
  host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
 
21
  password=password,
22
  )
23
 
24
+
25
+
26
  Model_links = {
27
  "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
28
  "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
 
52
  LUA_CHECK_AND_INC = """
53
  local key = KEYS[1]
54
  local status = tonumber(redis.call('HGET', key, 'status') or '0')
55
+ if status < 1 then
56
  redis.call('HSET', key, 'status', status + 1)
57
  return status
58
  end
59
  return -1
60
  """
61
 
62
+ LUA_REMOVAL_STATUS = """
63
+ local key = KEYS[1]
64
+ local status = tonumber(redis.call('HGET', key, 'status') or '0')
65
+ if status > 0 then
66
+ redis.call('HSET', key, 'status', status -1)
67
+ return status -1
68
+ end
69
+ return 0
70
+ """
71
+
72
  async def check_cond(model_link: str, fall_model: Optional[list] = None):
73
  status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
74
  if status != -1:
 
111
  if inputs.change and fall_models:
112
  for m in fall_models:
113
  model_name = f"{m}_fall"
114
+ if model_name in Model_links.keys():
115
+ link = Model_links[model_name]
116
+ fall_links.append(link)
117
+ else:
118
+ raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary")
119
 
120
  model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
121
 
122
  if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
123
  return await router(
124
+ model_url=model_cond["link"],
 
125
  prompt=inputs.prompt,
126
  temp=inputs.temperature
127
  )
 
129
  raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
130
 
131
  async def router(model_url, prompt, temp):
132
+ error_log = ""
133
+ process= ""
134
  payload = {"prompt": prompt, "temperature": temp}
135
  timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
136
 
 
138
  async with httpx.AsyncClient(timeout=timeout) as client:
139
  response = await client.post(model_url, json=payload)
140
  response.raise_for_status()
141
+ f_response = response.json()["response"]
142
+ if f_response is not None and str(f_response).strip() != "":
143
+ process += f_response
144
+
145
+ else:
146
+ raise ValueError("Empty string or None returned in response from LLM")
147
  except Exception as e:
148
+ error_log += f"LLM backend error: {str(e)}"
 
 
 
149
 
150
+ finally:
151
+ await r.eval(LUA_REMOVAL_STATUS, 1, model_url)
 
152
 
153
+ if error_log:
154
+ raise HTTPException(status_code=502, detail=error_log)
155
+ else:
156
+ return {"response": process}