npmaiecosystem commited on
Commit
0331354
·
verified ·
1 Parent(s): bfb02bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -26
app.py CHANGED
@@ -46,7 +46,6 @@ Model_links = {
46
  # Updated Lua Script
47
  LUA_CHECK_AND_INC = """
48
  local key = KEYS[1]
49
- local fallback_key = KEYS[2]
50
 
51
  local status = tonumber(redis.call('HGET', key, 'status') or '0')
52
  if status < 2 then
@@ -54,28 +53,20 @@ if status < 2 then
54
  return status
55
  end
56
 
57
- local fall_status = tonumber(redis.call('HGET', fallback_key, 'status') or '0')
58
- if fall_status < 2 then
59
- redis.call('HSET', fallback_key, 'status', fall_status + 1)
60
- return fall_status + 10
61
- end
62
-
63
  return -1
64
  """
65
 
66
  async def check_cond(model_link: str, fall_model: Optional[list] = None):
67
- # Try requested model & its fall copy first
68
- status = await r.eval(LUA_CHECK_AND_INC, 2, model_link, fall_model )
69
  if status != -1:
70
  return {"link": model_link, "statusno": status}
71
 
72
- # Try custom fallback models array
73
- if fall_model is not None:
74
  for model in fall_model:
75
  status = await r.eval(LUA_CHECK_AND_INC, 1, model)
76
  if status != -1:
77
  return {"link": model, "statusno": status}
78
- # Try all systemic models
79
  else:
80
  for model in Model_links.values():
81
  status = await r.eval(LUA_CHECK_AND_INC, 1, model)
@@ -84,6 +75,7 @@ async def check_cond(model_link: str, fall_model: Optional[list] = None):
84
 
85
  return None
86
 
 
87
  class Input(BaseModel):
88
  model: str
89
  temperature: float = 0.5
@@ -118,31 +110,26 @@ async def llm_router(inputs: Input):
118
  prompt=inputs.prompt,
119
  temp=inputs.temperature
120
  )
121
-
122
- raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
123
 
124
- async def router(model_cond: str, statusno: int, prompt: str, temp: float):
125
  payload = {"prompt": prompt, "temperature": temp}
126
  timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
127
-
128
- # Determine if we hit primary or fallback copy
129
- target_key = model_cond if statusno < 10 else model_cond + "fall"
130
 
131
  try:
132
  async with httpx.AsyncClient(timeout=timeout) as client:
133
- response = await client.post(model_cond, json=payload)
134
  response.raise_for_status()
135
  process = response.json()["response"]
136
  except Exception as e:
137
- # Decrement counter even if HTTP request crashes
138
- current_status = int(await r.hget(target_key, "status") or 0)
139
  if current_status > 0:
140
- await r.hset(target_key, "status", current_status - 1)
141
  raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
142
 
143
- # Standard completion decrement
144
- current_status = int(await r.hget(target_key, "status") or 0)
145
  if current_status > 0:
146
- await r.hset(target_key, "status", current_status - 1)
147
 
148
- return {"response": process}
 
46
  # Updated Lua Script
47
  LUA_CHECK_AND_INC = """
48
  local key = KEYS[1]
 
49
 
50
  local status = tonumber(redis.call('HGET', key, 'status') or '0')
51
  if status < 2 then
 
53
  return status
54
  end
55
 
 
 
 
 
 
 
56
  return -1
57
  """
58
 
59
  async def check_cond(model_link: str, fall_model: Optional[list] = None):
60
+ status = await r.eval(LUA_CHECK_AND_INC, 1, model_link)
 
61
  if status != -1:
62
  return {"link": model_link, "statusno": status}
63
 
64
+ if fall_model:
 
65
  for model in fall_model:
66
  status = await r.eval(LUA_CHECK_AND_INC, 1, model)
67
  if status != -1:
68
  return {"link": model, "statusno": status}
69
+
70
  else:
71
  for model in Model_links.values():
72
  status = await r.eval(LUA_CHECK_AND_INC, 1, model)
 
75
 
76
  return None
77
 
78
+
79
  class Input(BaseModel):
80
  model: str
81
  temperature: float = 0.5
 
110
  prompt=inputs.prompt,
111
  temp=inputs.temperature
112
  )
113
+ else:
114
+ raise HTTPException(status_code=503, detail="All model endpoints and fallbacks are busy.")
115
 
116
+ async def router(model_url, prompt, temp):
117
  payload = {"prompt": prompt, "temperature": temp}
118
  timeout = httpx.Timeout(connect=30.0, read=360.0, write=30.0, pool=120.0)
 
 
 
119
 
120
  try:
121
  async with httpx.AsyncClient(timeout=timeout) as client:
122
+ response = await client.post(model_url, json=payload)
123
  response.raise_for_status()
124
  process = response.json()["response"]
125
  except Exception as e:
126
+ current_status = int(await r.hget(model_url, "status") or 0)
 
127
  if current_status > 0:
128
+ await r.hset(model_url, "status", current_status - 1)
129
  raise HTTPException(status_code=502, detail=f"LLM backend error: {str(e)}")
130
 
131
+ current_status = int(await r.hget(model_url, "status") or 0)
 
132
  if current_status > 0:
133
+ await r.hset(model_url, "status", current_status - 1)
134
 
135
+ return {"response": process}