Ghisalbertifederico commited on
Commit
e8675f6
Β·
verified Β·
1 Parent(s): 41a2285

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -108,9 +108,9 @@ def web_search(query: str) -> str:
108
  # Model fallback chain (primary β†’ backup β†’ last-resort)
109
  # ---------------------------------------------------------------------------
110
  MODEL_CONFIGS = [
111
- {"model_id": "llama-3.3-70b-versatile"},
112
- {"model_id": "meta-llama/llama-4-scout-17b-16e-instruct"}, # 500K TPD, 30K TPM, fast
113
- {"model_id": "qwen/qwen3-32b"}, # 500K TPD, 6K TPM, strong backup
114
  {"model_id": "llama-3.1-8b-instant"}, # 500K TPD, 6K TPM, last resort
115
  ]
116
 
@@ -361,7 +361,7 @@ class WebSearchAgent:
361
  print("Agent error:", e)
362
  msg = str(e)
363
  # Re-raise rate-limit errors so _answer_question can fall back to the next model
364
- if "rate_limit_exceeded" in msg or "429" in msg:
365
  raise
366
  return f"AGENT ERROR: {e}"
367
 
@@ -431,12 +431,17 @@ def _answer_question(item: dict) -> str:
431
  return result
432
  except Exception as e:
433
  msg = str(e)
434
- if "rate_limit_exceeded" in msg or "429" in msg:
435
  # Check if it's a daily (TPD) limit β€” skip model for all remaining questions
436
  if "on tokens per day" in msg or "TPD" in msg:
437
  _exhausted_models.add(model_id)
438
  print(f"[{model_id}] Daily token limit hit β€” skipping for remaining questions")
439
  break # move to next model immediately
 
 
 
 
 
440
  wait = _parse_retry_after(msg) + 5
441
  print(f"[{model_id}] Rate limit hit, waiting {wait:.0f}s (attempt {attempt + 1}/2)...")
442
  time.sleep(wait)
 
108
  # Model fallback chain (primary β†’ backup β†’ last-resort)
109
  # ---------------------------------------------------------------------------
110
  MODEL_CONFIGS = [
111
+ {"model_id": "llama-3.3-70b-versatile"}, # 100K TPD, 12K TPM
112
+ {"model_id": "meta-llama/llama-4-scout-17b-16e-instruct"}, # 500K TPD, 30K TPM
113
+ {"model_id": "gemma2-9b-it"}, # 500K TPD, 15K TPM
114
  {"model_id": "llama-3.1-8b-instant"}, # 500K TPD, 6K TPM, last resort
115
  ]
116
 
 
361
  print("Agent error:", e)
362
  msg = str(e)
363
  # Re-raise rate-limit errors so _answer_question can fall back to the next model
364
+ if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
365
  raise
366
  return f"AGENT ERROR: {e}"
367
 
 
431
  return result
432
  except Exception as e:
433
  msg = str(e)
434
+ if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
435
  # Check if it's a daily (TPD) limit β€” skip model for all remaining questions
436
  if "on tokens per day" in msg or "TPD" in msg:
437
  _exhausted_models.add(model_id)
438
  print(f"[{model_id}] Daily token limit hit β€” skipping for remaining questions")
439
  break # move to next model immediately
440
+ # Check if request itself exceeds the model's TPM limit
441
+ # (413 or "Request too large" β€” retrying won't help, skip immediately)
442
+ if "413" in msg or "Request too large" in msg or "please reduce your message size" in msg:
443
+ print(f"[{model_id}] Request too large for model's TPM limit β€” skipping to next model")
444
+ break # move to next model immediately
445
  wait = _parse_retry_after(msg) + 5
446
  print(f"[{model_id}] Rate limit hit, waiting {wait:.0f}s (attempt {attempt + 1}/2)...")
447
  time.sleep(wait)