theakshayrane commited on
Commit
75dd41c
·
verified ·
1 Parent(s): e3eb464

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +18 -12
model.py CHANGED
@@ -21,21 +21,27 @@ class LocalTransformersModel:
21
 
22
  class WrapperLiteLLMModel(LiteLLMModel):
23
  def __call__(self, messages, **kwargs):
24
- max_retry = 5
25
  for attempt in range(max_retry):
26
  try:
27
  return super().__call__(messages, **kwargs)
28
- except RateLimitError as e:
29
- print(f"RateLimitError (attempt {attempt+1}/{max_retry})")
30
-
31
- # Try to extract retry time from the exception string
32
- match = re.search(r'"retryDelay": ?"(\d+)s"', str(e))
33
- retry_seconds = int(match.group(1)) if match else 50
34
-
35
- print(f"Sleeping for {retry_seconds} seconds before retrying...")
36
- time.sleep(retry_seconds)
37
-
38
- raise RateLimitError(f"Rate limit exceeded after {max_retry} retries.")
 
 
 
 
 
 
39
 
40
  @lru_cache(maxsize=1)
41
  def get_lite_llm_model(model_id: str, **kwargs) -> WrapperLiteLLMModel:
 
21
 
22
  class WrapperLiteLLMModel(LiteLLMModel):
23
  def __call__(self, messages, **kwargs):
24
+ max_retry = 7 # Increased retries so it's more patient
25
  for attempt in range(max_retry):
26
  try:
27
  return super().__call__(messages, **kwargs)
28
+ except Exception as e:
29
+ error_str = str(e)
30
+ # Catch both Rate Limits (429) and Server Overloads (503)
31
+ if "503" in error_str or "UNAVAILABLE" in error_str or "429" in error_str:
32
+ print(f"Google API busy (attempt {attempt+1}/{max_retry})")
33
+
34
+ # Try to extract retry time from the exception string, default to 20 seconds
35
+ match = re.search(r'"retryDelay": ?"(\d+)s"', error_str)
36
+ retry_seconds = int(match.group(1)) if match else 20
37
+
38
+ print(f"Sleeping for {retry_seconds} seconds before retrying...")
39
+ time.sleep(retry_seconds)
40
+ else:
41
+ # If it's a different error, crash normally
42
+ raise e
43
+
44
+ raise Exception(f"API request failed after {max_retry} retries due to server traffic.")
45
 
46
  @lru_cache(maxsize=1)
47
  def get_lite_llm_model(model_id: str, **kwargs) -> WrapperLiteLLMModel: