Spaces:
Running
Running
| """Regression tests for OpenAI cache-mode stability in proxy mode.""" | |
| from __future__ import annotations | |
| from types import SimpleNamespace | |
| import httpx | |
| import pytest | |
| pytest.importorskip("fastapi") | |
| from fastapi.testclient import TestClient | |
| from headroom.proxy.server import ProxyConfig, create_app | |
| class _FakePrefixTracker: | |
| def __init__(self, frozen_count: int): | |
| self._frozen_count = frozen_count | |
| def get_frozen_message_count(self) -> int: | |
| return self._frozen_count | |
| def update_from_response(self, **kwargs): # noqa: ANN003 | |
| return None | |
| def _make_proxy_client() -> TestClient: | |
| config = ProxyConfig( | |
| optimize=False, | |
| cache_enabled=False, | |
| rate_limit_enabled=False, | |
| cost_tracking_enabled=False, | |
| log_requests=False, | |
| ccr_inject_tool=False, | |
| ccr_handle_responses=False, | |
| ccr_context_tracking=False, | |
| image_optimize=False, | |
| ) | |
| app = create_app(config) | |
| return TestClient(app) | |
| def test_openai_cache_mode_freezes_previous_turns() -> None: | |
| captured = {} | |
| with _make_proxy_client() as client: | |
| proxy = client.app.state.proxy | |
| proxy.config.optimize = True | |
| proxy.config.mode = "cache" | |
| fake_tracker = _FakePrefixTracker(frozen_count=0) | |
| proxy.session_tracker_store.compute_session_id = lambda request, model, messages: ( | |
| "stable-session" | |
| ) | |
| proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker | |
| def _fake_apply(**kwargs): | |
| captured["frozen_message_count"] = kwargs.get("frozen_message_count") | |
| return SimpleNamespace( | |
| messages=kwargs["messages"], | |
| transforms_applied=[], | |
| timing={}, | |
| tokens_before=60, | |
| tokens_after=60, | |
| waste_signals=None, | |
| ) | |
| proxy.openai_pipeline.apply = _fake_apply | |
| async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001 | |
| return httpx.Response( | |
| 200, | |
| json={ | |
| "id": "chatcmpl_1", | |
| "choices": [ | |
| { | |
| "index": 0, | |
| "message": {"role": "assistant", "content": "ok"}, | |
| "finish_reason": "stop", | |
| } | |
| ], | |
| "usage": {"prompt_tokens": 60, "completion_tokens": 3, "total_tokens": 63}, | |
| }, | |
| ) | |
| proxy._retry_request = _fake_retry | |
| response = client.post( | |
| "/v1/chat/completions", | |
| headers={"authorization": "Bearer test-key"}, | |
| json={ | |
| "model": "gpt-4o-mini", | |
| "messages": [ | |
| {"role": "user", "content": "turn1"}, | |
| {"role": "assistant", "content": "turn1-assistant"}, | |
| {"role": "user", "content": "current turn"}, | |
| ], | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| assert captured["frozen_message_count"] == 2 | |
| def test_openai_cache_mode_restores_mutated_frozen_prefix() -> None: | |
| captured = {} | |
| with _make_proxy_client() as client: | |
| proxy = client.app.state.proxy | |
| proxy.config.optimize = True | |
| proxy.config.mode = "cache" | |
| fake_tracker = _FakePrefixTracker(frozen_count=0) | |
| proxy.session_tracker_store.compute_session_id = lambda request, model, messages: ( | |
| "stable-session" | |
| ) | |
| proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker | |
| original_messages = [ | |
| {"role": "user", "content": "turn1"}, | |
| {"role": "assistant", "content": "turn1-assistant"}, | |
| {"role": "user", "content": "current turn"}, | |
| ] | |
| def _fake_apply(**kwargs): | |
| mutated = list(kwargs["messages"]) | |
| mutated[0] = {**mutated[0], "content": "MUTATED_PREFIX"} | |
| return SimpleNamespace( | |
| messages=mutated, | |
| transforms_applied=["fake:mutated"], | |
| timing={}, | |
| tokens_before=70, | |
| tokens_after=65, | |
| waste_signals=None, | |
| ) | |
| proxy.openai_pipeline.apply = _fake_apply | |
| async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001 | |
| captured["body"] = body | |
| return httpx.Response( | |
| 200, | |
| json={ | |
| "id": "chatcmpl_2", | |
| "choices": [ | |
| { | |
| "index": 0, | |
| "message": {"role": "assistant", "content": "ok"}, | |
| "finish_reason": "stop", | |
| } | |
| ], | |
| "usage": {"prompt_tokens": 65, "completion_tokens": 3, "total_tokens": 68}, | |
| }, | |
| ) | |
| proxy._retry_request = _fake_retry | |
| response = client.post( | |
| "/v1/chat/completions", | |
| headers={"authorization": "Bearer test-key"}, | |
| json={ | |
| "model": "gpt-4o-mini", | |
| "messages": original_messages, | |
| }, | |
| ) | |
| assert response.status_code == 200 | |
| sent_messages = captured["body"]["messages"] | |
| assert sent_messages[0] == original_messages[0] | |
| assert sent_messages[1] == original_messages[1] | |