| """Tests for LLM error classification helpers in agent.core.agent_loop. |
| |
| Covers two regressions on 2026-04-25: |
| |
| 1. Non-Anthropic context overflow (Kimi 365k > 262k) was not classified as |
| ``_is_context_overflow_error``, so the recovery path didn't fire and |
| session 62ccfdcb died with 68 wasted compaction events. |
| |
| 2. Bedrock TPM rate limit (`Too many tokens, please wait before trying |
| again.`) needs the longer rate-limit retry schedule. The old schedule |
| ([5, 15, 30] = 50s) burned through 6 sessions costing >$2,400 combined |
| on the same day. |
| """ |
|
|
| from agent.core.agent_loop import ( |
| _MAX_LLM_RETRIES, |
| _LLM_RATE_LIMIT_RETRY_DELAYS, |
| _LLM_RETRY_DELAYS, |
| _is_context_overflow_error, |
| _is_rate_limit_error, |
| _is_transient_error, |
| _retry_delay_for, |
| ) |
|
|
|
|
| |
|
|
|
|
| def test_kimi_prompt_too_long_is_context_overflow(): |
| |
| err = Exception( |
| "litellm.BadRequestError: OpenAIException - The prompt is too long: " |
| "365407, model maximum context length: 262143" |
| ) |
| assert _is_context_overflow_error(err) |
|
|
|
|
| def test_openai_context_length_exceeded_is_context_overflow(): |
| err = Exception("Error: This model's maximum context length is 8192 tokens.") |
| assert _is_context_overflow_error(err) |
|
|
|
|
| def test_random_error_is_not_context_overflow(): |
| err = Exception("connection reset by peer") |
| assert not _is_context_overflow_error(err) |
|
|
|
|
| |
|
|
|
|
| def test_bedrock_too_many_tokens_is_rate_limit(): |
| |
| err = Exception( |
| 'litellm.RateLimitError: BedrockException - {"message":"Too many ' |
| 'tokens, please wait before trying again."}' |
| ) |
| assert _is_rate_limit_error(err) |
| |
| assert _is_transient_error(err) |
|
|
|
|
| def test_429_is_rate_limit(): |
| err = Exception("HTTP 429 Too Many Requests") |
| assert _is_rate_limit_error(err) |
|
|
|
|
| def test_timeout_is_transient_but_not_rate_limit(): |
| err = Exception("Request timed out after 600s") |
| assert _is_transient_error(err) |
| assert not _is_rate_limit_error(err) |
|
|
|
|
| |
|
|
|
|
| def test_rate_limit_uses_longer_schedule(): |
| err = Exception("Too many tokens, please wait before trying again.") |
| delays = [ |
| _retry_delay_for(err, i) for i in range(len(_LLM_RATE_LIMIT_RETRY_DELAYS)) |
| ] |
| assert delays == _LLM_RATE_LIMIT_RETRY_DELAYS |
| |
| assert _retry_delay_for(err, len(_LLM_RATE_LIMIT_RETRY_DELAYS)) is None |
|
|
|
|
| def test_other_transient_uses_short_schedule(): |
| err = Exception("503 service unavailable") |
| delays = [_retry_delay_for(err, i) for i in range(len(_LLM_RETRY_DELAYS))] |
| assert delays == _LLM_RETRY_DELAYS |
| assert _retry_delay_for(err, len(_LLM_RETRY_DELAYS)) is None |
|
|
|
|
| def test_non_transient_returns_none(): |
| err = Exception("invalid request: bad parameter") |
| assert _retry_delay_for(err, 0) is None |
|
|
|
|
| def test_rate_limit_total_budget_covers_bedrock_bucket_recovery(): |
| """The whole point of the rate-limit schedule: total wait time should |
| exceed the ~60s Bedrock TPM bucket recovery window.""" |
| assert len(_LLM_RATE_LIMIT_RETRY_DELAYS) == _MAX_LLM_RETRIES - 1 |
| assert sum(_LLM_RATE_LIMIT_RETRY_DELAYS) > 60 |
|
|