Update app.py
Browse files
app.py
CHANGED
|
@@ -108,9 +108,9 @@ def web_search(query: str) -> str:
|
|
| 108 |
# Model fallback chain (primary β backup β last-resort)
|
| 109 |
# ---------------------------------------------------------------------------
|
| 110 |
MODEL_CONFIGS = [
|
| 111 |
-
{"model_id": "llama-3.3-70b-versatile"},
|
| 112 |
-
{"model_id": "meta-llama/llama-4-scout-17b-16e-instruct"}, # 500K TPD, 30K TPM
|
| 113 |
-
{"model_id": "
|
| 114 |
{"model_id": "llama-3.1-8b-instant"}, # 500K TPD, 6K TPM, last resort
|
| 115 |
]
|
| 116 |
|
|
@@ -361,7 +361,7 @@ class WebSearchAgent:
|
|
| 361 |
print("Agent error:", e)
|
| 362 |
msg = str(e)
|
| 363 |
# Re-raise rate-limit errors so _answer_question can fall back to the next model
|
| 364 |
-
if "rate_limit_exceeded" in msg or "429" in msg:
|
| 365 |
raise
|
| 366 |
return f"AGENT ERROR: {e}"
|
| 367 |
|
|
@@ -431,12 +431,17 @@ def _answer_question(item: dict) -> str:
|
|
| 431 |
return result
|
| 432 |
except Exception as e:
|
| 433 |
msg = str(e)
|
| 434 |
-
if "rate_limit_exceeded" in msg or "429" in msg:
|
| 435 |
# Check if it's a daily (TPD) limit β skip model for all remaining questions
|
| 436 |
if "on tokens per day" in msg or "TPD" in msg:
|
| 437 |
_exhausted_models.add(model_id)
|
| 438 |
print(f"[{model_id}] Daily token limit hit β skipping for remaining questions")
|
| 439 |
break # move to next model immediately
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
wait = _parse_retry_after(msg) + 5
|
| 441 |
print(f"[{model_id}] Rate limit hit, waiting {wait:.0f}s (attempt {attempt + 1}/2)...")
|
| 442 |
time.sleep(wait)
|
|
|
|
| 108 |
# Model fallback chain (primary β backup β last-resort)
|
| 109 |
# ---------------------------------------------------------------------------
|
| 110 |
MODEL_CONFIGS = [
|
| 111 |
+
{"model_id": "llama-3.3-70b-versatile"}, # 100K TPD, 12K TPM
|
| 112 |
+
{"model_id": "meta-llama/llama-4-scout-17b-16e-instruct"}, # 500K TPD, 30K TPM
|
| 113 |
+
{"model_id": "gemma2-9b-it"}, # 500K TPD, 15K TPM
|
| 114 |
{"model_id": "llama-3.1-8b-instant"}, # 500K TPD, 6K TPM, last resort
|
| 115 |
]
|
| 116 |
|
|
|
|
| 361 |
print("Agent error:", e)
|
| 362 |
msg = str(e)
|
| 363 |
# Re-raise rate-limit errors so _answer_question can fall back to the next model
|
| 364 |
+
if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
|
| 365 |
raise
|
| 366 |
return f"AGENT ERROR: {e}"
|
| 367 |
|
|
|
|
| 431 |
return result
|
| 432 |
except Exception as e:
|
| 433 |
msg = str(e)
|
| 434 |
+
if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
|
| 435 |
# Check if it's a daily (TPD) limit β skip model for all remaining questions
|
| 436 |
if "on tokens per day" in msg or "TPD" in msg:
|
| 437 |
_exhausted_models.add(model_id)
|
| 438 |
print(f"[{model_id}] Daily token limit hit β skipping for remaining questions")
|
| 439 |
break # move to next model immediately
|
| 440 |
+
# Check if request itself exceeds the model's TPM limit
|
| 441 |
+
# (413 or "Request too large" β retrying won't help, skip immediately)
|
| 442 |
+
if "413" in msg or "Request too large" in msg or "please reduce your message size" in msg:
|
| 443 |
+
print(f"[{model_id}] Request too large for model's TPM limit β skipping to next model")
|
| 444 |
+
break # move to next model immediately
|
| 445 |
wait = _parse_retry_after(msg) + 5
|
| 446 |
print(f"[{model_id}] Rate limit hit, waiting {wait:.0f}s (attempt {attempt + 1}/2)...")
|
| 447 |
time.sleep(wait)
|