| """Tests for payload/context-length → compression retry logic in AIAgent. |
| |
| Verifies that: |
| - HTTP 413 errors trigger history compression and retry |
| - HTTP 400 context-length errors trigger compression (not generic 4xx abort) |
| - Preflight compression proactively compresses oversized sessions before API calls |
| """ |
|
|
| import pytest |
| pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments") |
|
|
|
|
|
|
| import uuid |
| from types import SimpleNamespace |
| from unittest.mock import MagicMock, patch |
|
|
| import pytest |
|
|
| from agent.context_compressor import SUMMARY_PREFIX |
| from run_agent import AIAgent |
|
|
|
|
| |
| |
| |
|
|
| def _make_tool_defs(*names: str) -> list: |
| return [ |
| { |
| "type": "function", |
| "function": { |
| "name": n, |
| "description": f"{n} tool", |
| "parameters": {"type": "object", "properties": {}}, |
| }, |
| } |
| for n in names |
| ] |
|
|
|
|
| def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, usage=None): |
| msg = SimpleNamespace( |
| content=content, |
| tool_calls=tool_calls, |
| reasoning_content=None, |
| reasoning=None, |
| ) |
| choice = SimpleNamespace(message=msg, finish_reason=finish_reason) |
| resp = SimpleNamespace(choices=[choice], model="test/model") |
| resp.usage = SimpleNamespace(**usage) if usage else None |
| return resp |
|
|
|
|
| def _make_413_error(*, use_status_code=True, message="Request entity too large"): |
| """Create an exception that mimics a 413 HTTP error.""" |
| err = Exception(message) |
| if use_status_code: |
| err.status_code = 413 |
| return err |
|
|
|
|
| @pytest.fixture() |
| def agent(): |
| with ( |
| patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), |
| patch("run_agent.check_toolset_requirements", return_value={}), |
| patch("run_agent.OpenAI"), |
| ): |
| a = AIAgent( |
| api_key="test-key-1234567890", |
| quiet_mode=True, |
| skip_context_files=True, |
| skip_memory=True, |
| ) |
| a.client = MagicMock() |
| a._cached_system_prompt = "You are helpful." |
| a._use_prompt_caching = False |
| a.tool_delay = 0 |
| a.compression_enabled = False |
| a.save_trajectories = False |
| return a |
|
|
|
|
| |
| |
| |
|
|
| class TestHTTP413Compression: |
| """413 errors should trigger compression, not abort as generic 4xx.""" |
|
|
| def test_413_triggers_compression(self, agent): |
| """A 413 error should call _compress_context and retry, not abort.""" |
| |
| err_413 = _make_413_error() |
| ok_resp = _mock_response(content="Success after compression", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [err_413, ok_resp] |
|
|
| |
| prefill = [ |
| {"role": "user", "content": "previous question"}, |
| {"role": "assistant", "content": "previous answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], |
| "compressed prompt", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| mock_compress.assert_called_once() |
| assert result["completed"] is True |
| assert result["final_response"] == "Success after compression" |
|
|
| def test_413_not_treated_as_generic_4xx(self, agent): |
| """413 must NOT hit the generic 4xx abort path; it should attempt compression.""" |
| err_413 = _make_413_error() |
| ok_resp = _mock_response(content="Recovered", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [err_413, ok_resp] |
|
|
| prefill = [ |
| {"role": "user", "content": "previous question"}, |
| {"role": "assistant", "content": "previous answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], |
| "compressed", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| |
| assert result.get("failed") is not True |
| assert result["completed"] is True |
|
|
| def test_413_error_message_detection(self, agent): |
| """413 detected via error message string (no status_code attr).""" |
| err = _make_413_error(use_status_code=False, message="error code: 413") |
| ok_resp = _mock_response(content="OK", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [err, ok_resp] |
|
|
| prefill = [ |
| {"role": "user", "content": "previous question"}, |
| {"role": "assistant", "content": "previous answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], |
| "compressed", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| mock_compress.assert_called_once() |
| assert result["completed"] is True |
|
|
| def test_400_context_length_triggers_compression(self, agent): |
| """A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx. |
| |
| OpenRouter returns HTTP 400 (not 413) for context-length errors. Before |
| the fix, this was caught by the generic 4xx handler which aborted |
| immediately — now it correctly triggers compression+retry. |
| """ |
| err_400 = Exception( |
| "Error code: 400 - {'error': {'message': " |
| "\"This endpoint's maximum context length is 204800 tokens. " |
| "However, you requested about 270460 tokens.\", 'code': 400}}" |
| ) |
| err_400.status_code = 400 |
| ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [err_400, ok_resp] |
|
|
| prefill = [ |
| {"role": "user", "content": "previous question"}, |
| {"role": "assistant", "content": "previous answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], |
| "compressed prompt", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| mock_compress.assert_called_once() |
| |
| assert result.get("failed") is not True |
| assert result["completed"] is True |
| assert result["final_response"] == "Recovered after compression" |
|
|
| def test_400_reduce_length_triggers_compression(self, agent): |
| """A 400 with 'reduce the length' should trigger compression.""" |
| err_400 = Exception( |
| "Error code: 400 - Please reduce the length of the messages" |
| ) |
| err_400.status_code = 400 |
| ok_resp = _mock_response(content="OK", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [err_400, ok_resp] |
|
|
| prefill = [ |
| {"role": "user", "content": "previous question"}, |
| {"role": "assistant", "content": "previous answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], |
| "compressed", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| mock_compress.assert_called_once() |
| assert result["completed"] is True |
|
|
| def test_context_length_retry_rebuilds_request_after_compression(self, agent): |
| """Retry must send the compressed transcript, not the stale oversized payload.""" |
| err_400 = Exception( |
| "Error code: 400 - {'error': {'message': " |
| "\"This endpoint's maximum context length is 128000 tokens. " |
| "Please reduce the length of the messages.\"}}" |
| ) |
| err_400.status_code = 400 |
| ok_resp = _mock_response(content="Recovered after real compression", finish_reason="stop") |
|
|
| request_payloads = [] |
|
|
| def _side_effect(**kwargs): |
| request_payloads.append(kwargs) |
| if len(request_payloads) == 1: |
| raise err_400 |
| return ok_resp |
|
|
| agent.client.chat.completions.create.side_effect = _side_effect |
|
|
| prefill = [ |
| {"role": "user", "content": "previous question"}, |
| {"role": "assistant", "content": "previous answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "compressed summary"}], |
| "compressed prompt", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| assert result["completed"] is True |
| assert len(request_payloads) == 2 |
| assert len(request_payloads[1]["messages"]) < len(request_payloads[0]["messages"]) |
| assert request_payloads[1]["messages"][0] == { |
| "role": "system", |
| "content": "compressed prompt", |
| } |
| assert request_payloads[1]["messages"][1] == { |
| "role": "user", |
| "content": "compressed summary", |
| } |
|
|
| def test_413_cannot_compress_further(self, agent): |
| """When compression can't reduce messages, return partial result.""" |
| err_413 = _make_413_error() |
| agent.client.chat.completions.create.side_effect = [err_413] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], |
| "same prompt", |
| ) |
| result = agent.run_conversation("hello") |
|
|
| assert result["completed"] is False |
| assert result.get("partial") is True |
| assert "413" in result["error"] |
|
|
|
|
| class TestPreflightCompression: |
| """Preflight compression should compress history before the first API call.""" |
|
|
| def test_preflight_compresses_oversized_history(self, agent): |
| """When loaded history exceeds the model's context threshold, compress before API call.""" |
| agent.compression_enabled = True |
| |
| agent.context_compressor.context_length = 100 |
| agent.context_compressor.threshold_tokens = 85 |
|
|
| |
| |
| big_history = [] |
| for i in range(20): |
| big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"}) |
| big_history.append({"role": "assistant", "content": f"Response number {i} with extra padding here"}) |
|
|
| ok_resp = _mock_response(content="After preflight", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [ok_resp] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| |
| mock_compress.return_value = ( |
| [ |
| {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"}, |
| {"role": "user", "content": "hello"}, |
| ], |
| "new system prompt", |
| ) |
| result = agent.run_conversation("hello", conversation_history=big_history) |
|
|
| |
| mock_compress.assert_called_once() |
| assert result["completed"] is True |
| assert result["final_response"] == "After preflight" |
|
|
| def test_no_preflight_when_under_threshold(self, agent): |
| """When history fits within context, no preflight compression needed.""" |
| agent.compression_enabled = True |
| |
| agent.context_compressor.context_length = 1000000 |
| agent.context_compressor.threshold_tokens = 850000 |
|
|
| small_history = [ |
| {"role": "user", "content": "hi"}, |
| {"role": "assistant", "content": "hello"}, |
| ] |
|
|
| ok_resp = _mock_response(content="No compression needed", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [ok_resp] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| result = agent.run_conversation("hello", conversation_history=small_history) |
|
|
| mock_compress.assert_not_called() |
| assert result["completed"] is True |
|
|
| def test_no_preflight_when_compression_disabled(self, agent): |
| """Preflight should not run when compression is disabled.""" |
| agent.compression_enabled = False |
| agent.context_compressor.context_length = 100 |
| agent.context_compressor.threshold_tokens = 85 |
|
|
| big_history = [ |
| {"role": "user", "content": "x" * 1000}, |
| {"role": "assistant", "content": "y" * 1000}, |
| ] * 10 |
|
|
| ok_resp = _mock_response(content="OK", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [ok_resp] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| result = agent.run_conversation("hello", conversation_history=big_history) |
|
|
| mock_compress.assert_not_called() |
|
|
|
|
| class TestToolResultPreflightCompression: |
| """Compression should trigger when tool results push context past the threshold.""" |
|
|
| def test_large_tool_results_trigger_compression(self, agent): |
| """When tool results push estimated tokens past threshold, compress before next call.""" |
| agent.compression_enabled = True |
| agent.context_compressor.context_length = 200_000 |
| agent.context_compressor.threshold_tokens = 140_000 |
| agent.context_compressor.last_prompt_tokens = 130_000 |
| agent.context_compressor.last_completion_tokens = 5_000 |
|
|
| tc = SimpleNamespace( |
| id="tc1", type="function", |
| function=SimpleNamespace(name="web_search", arguments='{"query":"test"}'), |
| ) |
| tool_resp = _mock_response( |
| content=None, finish_reason="stop", tool_calls=[tc], |
| usage={"prompt_tokens": 130_000, "completion_tokens": 5_000, "total_tokens": 135_000}, |
| ) |
| ok_resp = _mock_response( |
| content="Done after compression", finish_reason="stop", |
| usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100}, |
| ) |
| agent.client.chat.completions.create.side_effect = [tool_resp, ok_resp] |
| large_result = "x" * 100_000 |
|
|
| with ( |
| patch("run_agent.handle_function_call", return_value=large_result), |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], "compressed prompt", |
| ) |
| result = agent.run_conversation("hello") |
|
|
| mock_compress.assert_called_once() |
| assert result["completed"] is True |
|
|
| def test_anthropic_prompt_too_long_safety_net(self, agent): |
| """Anthropic 'prompt is too long' error triggers compression as safety net.""" |
| err_400 = Exception( |
| "Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', " |
| "'message': 'prompt is too long: 233153 tokens > 200000 maximum'}}" |
| ) |
| err_400.status_code = 400 |
| ok_resp = _mock_response(content="Recovered", finish_reason="stop") |
| agent.client.chat.completions.create.side_effect = [err_400, ok_resp] |
| prefill = [ |
| {"role": "user", "content": "previous"}, |
| {"role": "assistant", "content": "answer"}, |
| ] |
|
|
| with ( |
| patch.object(agent, "_compress_context") as mock_compress, |
| patch.object(agent, "_persist_session"), |
| patch.object(agent, "_save_trajectory"), |
| patch.object(agent, "_cleanup_task_resources"), |
| ): |
| mock_compress.return_value = ( |
| [{"role": "user", "content": "hello"}], "compressed", |
| ) |
| result = agent.run_conversation("hello", conversation_history=prefill) |
|
|
| mock_compress.assert_called_once() |
| assert result["completed"] is True |
|
|