Spaces:

npmaiecosystem
/

loadbalancerfallback

Running

App Files Files Community

npmaiecosystem commited on 7 days ago

Commit

314c40e

verified ·

1 Parent(s): ef42d48

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -14

app.py CHANGED Viewed

@@ -8,6 +8,10 @@ import os
 app = FastAPI()
 password = os.environ.get("PASSWORD")
 r = Redis(
     host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
@@ -17,6 +21,8 @@ r = Redis(
     password=password,
 )
 Model_links = {
     "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
     "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
@@ -46,13 +52,23 @@ Model_links = {
 LUA_CHECK_AND_INC = """
 local key = KEYS[1]
 local status = tonumber(redis.call('HGET', key, 'status') or '0')
-if status < 2 then
     redis.call('HSET', key, 'status', status + 1)
     return status
 end
 return -1
 """
 async def check_cond(model_link: str, fall_model: Optional[list] = None):
     status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
     if status != -1:
@@ -95,15 +111,17 @@ async def llm_router(inputs: Input):
     if inputs.change and fall_models:
         for m in fall_models:
             model_name = f"{m}_fall"
-            link = Model_links[model_name]
-            fall_links.append(link)
     model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
     if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
         return await router(
-            model_cond=model_cond["link"],
-            statusno=model_cond["statusno"],
             prompt=inputs.prompt,
             temp=inputs.temperature
         )
@@ -111,6 +129,8 @@ async def llm_router(inputs: Input):
         raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
 async def router(model_url, prompt, temp):
     payload = {"prompt": prompt, "temperature": temp}
     timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
@@ -118,15 +138,19 @@ async def router(model_url, prompt, temp):
         async with httpx.AsyncClient(timeout=timeout) as client:
             response = await client.post(model_url, json=payload)
             response.raise_for_status()
-            process = response.json()["response"]
     except Exception as e:
-        current_status = int(await r.hget(model_url, "status") or 0)
-        if current_status > 0:
-            await r.hset(model_url, "status", current_status - 1)
-        raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
-    current_status = int(await r.hget(model_url, "status") or 0)
-    if current_status > 0:
-        await r.hset(model_url, "status", current_status - 1)
-    return {"response": process}

 app = FastAPI()
+@app.post("/")
+def health_check():
+    return "Healthy"
 password = os.environ.get("PASSWORD")
 r = Redis(
     host='redis-15562.c1.us-west-2-2.ec2.cloud.redislabs.com',
     password=password,
 )
 Model_links = {
     "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llama",
     "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
 LUA_CHECK_AND_INC = """
 local key = KEYS[1]
 local status = tonumber(redis.call('HGET', key, 'status') or '0')
+if status < 1 then
     redis.call('HSET', key, 'status', status + 1)
     return status
 end
 return -1
 """
+LUA_REMOVAL_STATUS = """
+local key = KEYS[1]
+local status = tonumber(redis.call('HGET', key, 'status') or '0')
+if status > 0 then
+    redis.call('HSET', key, 'status', status -1)
+    return status -1
+end
+return 0
+"""
 async def check_cond(model_link: str, fall_model: Optional[list] = None):
     status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
     if status != -1:
     if inputs.change and fall_models:
         for m in fall_models:
             model_name = f"{m}_fall"
+            if model_name in Model_links.keys():
+                link = Model_links[model_name]
+                fall_links.append(link)
+            else:
+                raise HTTPException(status_code=402, detail="Fallback models are not found in Models Dictionary")
     model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
     if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
         return await router(
+            model_url=model_cond["link"],
             prompt=inputs.prompt,
             temp=inputs.temperature
         )
         raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
 async def router(model_url, prompt, temp):
+    error_log = ""
+    process= ""
     payload = {"prompt": prompt, "temperature": temp}
     timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
         async with httpx.AsyncClient(timeout=timeout) as client:
             response = await client.post(model_url, json=payload)
             response.raise_for_status()
+            f_response = response.json()["response"]
+            if f_response is not None and str(f_response).strip() != "":
+                process += f_response
+            else:
+                raise ValueError("Empty string or None returned in response from LLM")
     except Exception as e:
+        error_log += f"LLM backend error: {str(e)}"
+    finally:
+        await r.eval(LUA_REMOVAL_STATUS, 1, model_url)
+    if error_log:
+        raise HTTPException(status_code=502, detail=error_log)
+    else:
+        return {"response": process}