"""Regression tests for OpenAI cache-mode stability in proxy mode.""" from __future__ import annotations from types import SimpleNamespace import httpx import pytest pytest.importorskip("fastapi") from fastapi.testclient import TestClient from headroom.proxy.server import ProxyConfig, create_app class _FakePrefixTracker: def __init__(self, frozen_count: int): self._frozen_count = frozen_count def get_frozen_message_count(self) -> int: return self._frozen_count def update_from_response(self, **kwargs): # noqa: ANN003 return None def _make_proxy_client() -> TestClient: config = ProxyConfig( optimize=False, cache_enabled=False, rate_limit_enabled=False, cost_tracking_enabled=False, log_requests=False, ccr_inject_tool=False, ccr_handle_responses=False, ccr_context_tracking=False, image_optimize=False, ) app = create_app(config) return TestClient(app) def test_openai_cache_mode_freezes_previous_turns() -> None: captured = {} with _make_proxy_client() as client: proxy = client.app.state.proxy proxy.config.optimize = True proxy.config.mode = "cache" fake_tracker = _FakePrefixTracker(frozen_count=0) proxy.session_tracker_store.compute_session_id = lambda request, model, messages: ( "stable-session" ) proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker def _fake_apply(**kwargs): captured["frozen_message_count"] = kwargs.get("frozen_message_count") return SimpleNamespace( messages=kwargs["messages"], transforms_applied=[], timing={}, tokens_before=60, tokens_after=60, waste_signals=None, ) proxy.openai_pipeline.apply = _fake_apply async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001 return httpx.Response( 200, json={ "id": "chatcmpl_1", "choices": [ { "index": 0, "message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop", } ], "usage": {"prompt_tokens": 60, "completion_tokens": 3, "total_tokens": 63}, }, ) proxy._retry_request = _fake_retry response = client.post( "/v1/chat/completions", headers={"authorization": "Bearer test-key"}, json={ "model": "gpt-4o-mini", "messages": [ {"role": "user", "content": "turn1"}, {"role": "assistant", "content": "turn1-assistant"}, {"role": "user", "content": "current turn"}, ], }, ) assert response.status_code == 200 assert captured["frozen_message_count"] == 2 def test_openai_cache_mode_restores_mutated_frozen_prefix() -> None: captured = {} with _make_proxy_client() as client: proxy = client.app.state.proxy proxy.config.optimize = True proxy.config.mode = "cache" fake_tracker = _FakePrefixTracker(frozen_count=0) proxy.session_tracker_store.compute_session_id = lambda request, model, messages: ( "stable-session" ) proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker original_messages = [ {"role": "user", "content": "turn1"}, {"role": "assistant", "content": "turn1-assistant"}, {"role": "user", "content": "current turn"}, ] def _fake_apply(**kwargs): mutated = list(kwargs["messages"]) mutated[0] = {**mutated[0], "content": "MUTATED_PREFIX"} return SimpleNamespace( messages=mutated, transforms_applied=["fake:mutated"], timing={}, tokens_before=70, tokens_after=65, waste_signals=None, ) proxy.openai_pipeline.apply = _fake_apply async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001 captured["body"] = body return httpx.Response( 200, json={ "id": "chatcmpl_2", "choices": [ { "index": 0, "message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop", } ], "usage": {"prompt_tokens": 65, "completion_tokens": 3, "total_tokens": 68}, }, ) proxy._retry_request = _fake_retry response = client.post( "/v1/chat/completions", headers={"authorization": "Bearer test-key"}, json={ "model": "gpt-4o-mini", "messages": original_messages, }, ) assert response.status_code == 200 sent_messages = captured["body"]["messages"] assert sent_messages[0] == original_messages[0] assert sent_messages[1] == original_messages[1]