Spaces:

npmaiecosystem
/

loadbalancer

Running

App Files Files Community

npmaiecosystem commited on 15 days ago

Commit

0331354

verified ·

1 Parent(s): bfb02bc

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -26

app.py CHANGED Viewed

@@ -46,7 +46,6 @@ Model_links = {
 # Updated Lua Script
 LUA_CHECK_AND_INC = """
 local key = KEYS[1]
-local fallback_key = KEYS[2]
 local status = tonumber(redis.call('HGET', key, 'status') or '0')
 if status < 2 then
@@ -54,28 +53,20 @@ if status < 2 then
     return status
 end
-local fall_status = tonumber(redis.call('HGET', fallback_key, 'status') or '0')
-if fall_status < 2 then
-    redis.call('HSET', fallback_key, 'status', fall_status + 1)
-    return fall_status + 10
-end
 return -1
 """
 async def check_cond(model_link: str, fall_model: Optional[list] = None):
-    # Try requested model & its fall copy first
-    status = await r.eval(LUA_CHECK_AND_INC, 2, model_link, fall_model )
     if status != -1:
         return {"link": model_link, "statusno": status}
-    # Try custom fallback models array
-    if fall_model is not None:
         for model in fall_model:
             status = await r.eval(LUA_CHECK_AND_INC, 1, model)
             if status != -1:
                 return {"link": model, "statusno": status}
-    # Try all systemic models
     else:
         for model in Model_links.values():
             status = await r.eval(LUA_CHECK_AND_INC, 1, model)
@@ -84,6 +75,7 @@ async def check_cond(model_link: str, fall_model: Optional[list] = None):
     return None
 class Input(BaseModel):
     model: str
     temperature: float = 0.5
@@ -118,31 +110,26 @@ async def llm_router(inputs: Input):
             prompt=inputs.prompt,
             temp=inputs.temperature
         )
-    raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
-async def router(model_cond: str, statusno: int, prompt: str, temp: float):
     payload = {"prompt": prompt, "temperature": temp}
     timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
-    # Determine if we hit primary or fallback copy
-    target_key = model_cond if statusno < 10 else model_cond + "fall"
     try:
         async with httpx.AsyncClient(timeout=timeout) as client:
-            response = await client.post(model_cond, json=payload)
             response.raise_for_status()
             process = response.json()["response"]
     except Exception as e:
-        # Decrement counter even if HTTP request crashes
-        current_status = int(await r.hget(target_key, "status") or 0)
         if current_status > 0:
-            await r.hset(target_key, "status", current_status - 1)
         raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
-    # Standard completion decrement
-    current_status = int(await r.hget(target_key, "status") or 0)
     if current_status > 0:
-        await r.hset(target_key, "status", current_status - 1)
-    return {"response": process}

 # Updated Lua Script
 LUA_CHECK_AND_INC = """
 local key = KEYS[1]
 local status = tonumber(redis.call('HGET', key, 'status') or '0')
 if status < 2 then
     return status
 end
 return -1
 """
 async def check_cond(model_link: str, fall_model: Optional[list] = None):
+    status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
     if status != -1:
         return {"link": model_link, "statusno": status}
+    if fall_model:
         for model in fall_model:
             status = await r.eval(LUA_CHECK_AND_INC, 1, model)
             if status != -1:
                 return {"link": model, "statusno": status}
     else:
         for model in Model_links.values():
             status = await r.eval(LUA_CHECK_AND_INC, 1, model)
     return None
 class Input(BaseModel):
     model: str
     temperature: float = 0.5
             prompt=inputs.prompt,
             temp=inputs.temperature
         )
+    else:
+        raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
+async def router(model_url, prompt, temp):
     payload = {"prompt": prompt, "temperature": temp}
     timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
     try:
         async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(model_url, json=payload)
             response.raise_for_status()
             process = response.json()["response"]
     except Exception as e:
+        current_status = int(await r.hget(model_url, "status") or 0)
         if current_status > 0:
+            await r.hset(model_url, "status", current_status - 1)
         raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
+    current_status = int(await r.hget(model_url, "status") or 0)
     if current_status > 0:
+        await r.hset(model_url, "status", current_status - 1)
+    return {"response": process}