Spaces:

npmaiecosystem
/

loadbalancer

Running

App Files Files Community

npmaiecosystem commited on 16 days ago

Commit

bfb02bc

verified ·

1 Parent(s): 198f999

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -100

app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 from fastapi import FastAPI, Request, HTTPException
 from pydantic import BaseModel
-from typing import Annotated, Any
 import asyncio
 from redis.asyncio import Redis
 import httpx
 import os
-#FastAPI Initialistaion
-app= FastAPI()
 password = os.environ.get("PASSWORD")
-#Redis Initialistaion
 r = Redis(
     host='redislabs.com',
     port=15562,
@@ -18,119 +16,133 @@ r = Redis(
     username="default",
     password=password,
 )
-print(r)
-Model_links={
-    "model":"link"
 }
-Model_fall_links = {
-    "model":"link"
-}
-#Lua Script
 LUA_CHECK_AND_INC = """
 local key = KEYS[1]
 local status = tonumber(redis.call('HGET', key, 'status') or '0')
 if status < 2 then
     redis.call('HSET', key, 'status', status + 1)
-    return status -- returns the old status (0 or 1) so you know what it was
-else
-    return -1 -- means busy
 end
 """
-async def check_cond(model_link,fall_model=None):
-  status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
-  if status != -1:
-    return {"link": model_link, "statusno": status}
-  if fall_model != None:
-    for model in fall_model:
-      status = await r.eval(LUA_CHECK_AND_INC, 1, model)
-      if status != -1:
-        return {"link": model, "statusno": status}
-  else:
-    key = Model_links.keys()
-    for model in key:
-      status = await r.eval(LUA_CHECK_AND_INC, 1, Model_links[model])
-      if status != -1:
-        return {"link": Model_links[model], "statusno": status}
-  return "Your requested model and other models are busy try again."
-#Input initialistaion
-class Input(BaseModel):
-  model:str
-  temperature:float = 0.5
-  prompt:str
-  change:bool = True
-  Models:list = None
 @app.post("/load_balancer")
 async def llm_router(inputs: Input):
-  if not inputs.model and inputs.prompt:
-    return HTTPException(status_code=404, detail="Model name and Prompt is required.")
-  if inputs.model in Model_links.keys():
-    model_link = Model_links[inputs.model]
-    if inputs.change:
-      if inputs.Models != None:
-        fall_links = [Model_links[m] for m in inputs.Models if m in Model_links]
-        model_cond = await check_cond(model_link=model_link,fall_model=fall_links)
-        if isinstance(model_cond, dict) and model_cond.get("link") and model_cond.get("statusno") is not None:
-          return await router(model_cond=model_cond["link"],statusno=model_cond["statusno"], prompt = inputs.prompt, temp = inputs.temperature)
-        else:
-          return HTTPException(status=404, detail="Sorry We are not able to serve your requests due to not available rooms try again later.")
-      else:
-        model_cond = await check_cond(model_link=model_link)
-        if isinstance(model_cond, dict) and model_cond.get("link") and model_cond.get("statusno") is not None:
-          return await router(model_cond=model_cond["link"],statusno=model_cond["statusno"], prompt = inputs.prompt, temp = inputs.temperature)
-        else:
-          return HTTPException(status=404, detail="Sorry We are not able to serve your requests due to not available rooms try again later.")
-    else:
-      model_cond = await check_cond(model_link=model_link)
-      if isinstance(model_cond, dict) and model_cond.get("link") and model_cond.get("statusno") is not None:
-        return await router(model_cond=model_cond["link"],statusno=model_cond["statusno"],prompt = inputs.prompt)
-      else:
-        return HTTPException(status=404, detail="Sorry We are not able to serve your requests due to not available rooms try again later.")
-async def router(model_cond, statusno, prompt, temp):
-  payload ={
-      "prompt":prompt,
-      "temperature":temp
-  }
-  timeout = httpx.Timeout(
-    connect=30.0,  # connection ka max time
-    read=360.0,    # response read karne ka max time
-    write=30.0,    # request bhejne ka max time
-    pool=120.0      # connection pool wait
-    )
-  if statusno == 0:
-    try:
-      async with httpx.AsyncClient(timeout=timeout) as client:
-        response = await client.post(model_cond,json=payload)
-        process = response.json()["response"]
-    except Exception as e:
-      return {"response":e}
-    pass
-  elif statusno == 1:
     try:
-      async with httpx.AsyncClient(timeout=timeout) as client:
-        response = await client.post(model_cond,json=payload)
-        process = response.json()["response"]
     except Exception as e:
-      return {"response":e}
-    pass
-  current_status = int(r.hget(model_cond, "status") or 0)
-  if current_status > 0:
-      await r.hset(model_cond, "status", current_status - 1)
-  return {"response":process}

 from fastapi import FastAPI, Request, HTTPException
 from pydantic import BaseModel
+from typing import Annotated, Any, Optional
 import asyncio
 from redis.asyncio import Redis
 import httpx
 import os
+app = FastAPI()
 password = os.environ.get("PASSWORD")
 r = Redis(
     host='redislabs.com',
     port=15562,
     username="default",
     password=password,
 )
+Model_links = {
+    "llama3.2": "https://sonuramashishnpm-npmai.hf.space/llm",
+    "qwen2.5-coder:7b":"https://sonuramashishnpm-npmai.hf.space/qwen",
+    "vicuna:7b":"https://sonuramashish22028704-vicuna7b.hf.space/llm",
+    "gemma3:12b":"https://sonuramashish22028704-vicuna7b.hf.space/gemma",
+    "internlm2:7b":"https://sonuramashish22028704-internlm27b.hf.space/llm",
+    "maxkb/baichuan2:13b-chat":"https://sonuramashish22028704-internlm27b.hf.space/baichuan",
+    "falcon:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/llm",
+    "codellama:7b-instruct":"https://sonuramashish22028704-falcon7binstruct.hf.space/codellama",
+    "mistral:7b":"https://sonuramashish22028704-mistral7b.hf.space/llm",
+    "phi3:medium":"https://sonuramashish22028704-phi3medium.hf.space/llm",
+    "qwen3.5:9b":"https://sonuramashish22028704-vicuna7b.hf.space/qwen359gb",
+    "gemma2:9b":"https://sonuramashish22028704-internlm27b.hf.space/gemma29b",
+    "llama3.2_fall":"https://sonuramashishnpm-model1.hf.space/llamafall",
+    "qwen2.5-coder:7b_fall":"https://sonuramashishnpm-model1.hf.space/qwenfall",
+    "vicuna:7b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_vicuna",
+    "gemma3:12b_fall":"https://sonuramashishnpm-npm-journalist.hf.space/llm_fall_gemma312b",
+    "internlm2:7b_fall":"https://sonuramashishnpm-model2.hf.space/llm_fall_interlm",
+    "falcon:7b-instruct_fall":"https://sonuramashishnpm-mistral7b.hf.space/llm_fall_falcon",
+    "codellama:7b-instruct_fall":"https://sonuramashishnpm-model3.hf.space/codellamafall",
+    "mistral:7b_fall":"https://sonuramashishnpm-model2.hf.space/fall_llm_mistral",
+    "phi3:medium_fall":"https://sonuramashishnpm-.hf.space/",
+    "qwen3.5:9b_fall":"https://sonuramashishnpm-model4.hf.space/llm_fall_qwen359b",
+    "gemma2:9b_fall":"https://sonuramashishnpm-model3.hf.space/llm_fall_gemma29b"
 }
+# Updated Lua Script
 LUA_CHECK_AND_INC = """
 local key = KEYS[1]
+local fallback_key = KEYS[2]
 local status = tonumber(redis.call('HGET', key, 'status') or '0')
 if status < 2 then
     redis.call('HSET', key, 'status', status + 1)
+    return status
+end
+local fall_status = tonumber(redis.call('HGET', fallback_key, 'status') or '0')
+if fall_status < 2 then
+    redis.call('HSET', fallback_key, 'status', fall_status + 1)
+    return fall_status + 10
 end
+return -1
 """
+async def check_cond(model_link: str, fall_model: Optional[list] = None):
+    # Try requested model & its fall copy first
+    status = await r.eval(LUA_CHECK_AND_INC, 2, model_link, fall_model )
+    if status != -1:
+        return {"link": model_link, "statusno": status}
+    # Try custom fallback models array
+    if fall_model is not None:
+        for model in fall_model:
+            status = await r.eval(LUA_CHECK_AND_INC, 1, model)
+            if status != -1:
+                return {"link": model, "statusno": status}
+    # Try all systemic models
+    else:
+        for model in Model_links.values():
+            status = await r.eval(LUA_CHECK_AND_INC, 1, model)
+            if status != -1:
+                return {"link": model, "statusno": status}
+    return None
+class Input(BaseModel):
+    model: str
+    temperature: float = 0.5
+    prompt: str
+    change: bool = True
+    Models: Optional[list] = None
 @app.post("/load_balancer")
 async def llm_router(inputs: Input):
+    if not inputs.model or not inputs.prompt:
+        raise HTTPException(status_code=400, detail="Model name and Prompt are required.")
+    if inputs.model not in Model_links:
+        raise HTTPException(status_code=444, detail="Model not found.")
+    model_link = Model_links[inputs.model]
+    fall_links = []
+    fall_models = inputs.Models
+    if inputs.change and fall_models:
+        for m in fall_models:
+            model_name = f"{m}_fall"
+            link = Model_links[model_name]
+            fall_links.append(link)
+    model_cond = await check_cond(model_link=model_link, fall_model=fall_links)
+    if model_cond and model_cond.get("link") and model_cond.get("statusno") is not None:
+        return await router(
+            model_cond=model_cond["link"],
+            statusno=model_cond["statusno"],
+            prompt=inputs.prompt,
+            temp=inputs.temperature
+        )
+    raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
+async def router(model_cond: str, statusno: int, prompt: str, temp: float):
+    payload = {"prompt": prompt, "temperature": temp}
+    timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
+    # Determine if we hit primary or fallback copy
+    target_key = model_cond if statusno < 10 else model_cond + "fall"
     try:
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(model_cond, json=payload)
+            response.raise_for_status()
+            process = response.json()["response"]
     except Exception as e:
+        # Decrement counter even if HTTP request crashes
+        current_status = int(await r.hget(target_key, "status") or 0)
+        if current_status > 0:
+            await r.hset(target_key, "status", current_status - 1)
+        raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
+    # Standard completion decrement
+    current_status = int(await r.hget(target_key, "status") or 0)
+    if current_status > 0:
+        await r.hset(target_key, "status", current_status - 1)
+    return {"response": process}