headroom / tests /test_proxy /test_proxy_openai_cache_stability.py
tudragon154203
fix: route count_tokens to api.anthropic.com, not proxy base_url
0adb431
"""Regression tests for OpenAI cache-mode stability in proxy mode."""
from __future__ import annotations
from types import SimpleNamespace
import httpx
import pytest
pytest.importorskip("fastapi")
from fastapi.testclient import TestClient
from headroom.proxy.server import ProxyConfig, create_app
class _FakePrefixTracker:
def __init__(self, frozen_count: int):
self._frozen_count = frozen_count
def get_frozen_message_count(self) -> int:
return self._frozen_count
def update_from_response(self, **kwargs): # noqa: ANN003
return None
def _make_proxy_client() -> TestClient:
config = ProxyConfig(
optimize=False,
cache_enabled=False,
rate_limit_enabled=False,
cost_tracking_enabled=False,
log_requests=False,
ccr_inject_tool=False,
ccr_handle_responses=False,
ccr_context_tracking=False,
image_optimize=False,
)
app = create_app(config)
return TestClient(app)
def test_openai_cache_mode_freezes_previous_turns() -> None:
captured = {}
with _make_proxy_client() as client:
proxy = client.app.state.proxy
proxy.config.optimize = True
proxy.config.mode = "cache"
fake_tracker = _FakePrefixTracker(frozen_count=0)
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
"stable-session"
)
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
def _fake_apply(**kwargs):
captured["frozen_message_count"] = kwargs.get("frozen_message_count")
return SimpleNamespace(
messages=kwargs["messages"],
transforms_applied=[],
timing={},
tokens_before=60,
tokens_after=60,
waste_signals=None,
)
proxy.openai_pipeline.apply = _fake_apply
async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001
return httpx.Response(
200,
json={
"id": "chatcmpl_1",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 60, "completion_tokens": 3, "total_tokens": 63},
},
)
proxy._retry_request = _fake_retry
response = client.post(
"/v1/chat/completions",
headers={"authorization": "Bearer test-key"},
json={
"model": "gpt-4o-mini",
"messages": [
{"role": "user", "content": "turn1"},
{"role": "assistant", "content": "turn1-assistant"},
{"role": "user", "content": "current turn"},
],
},
)
assert response.status_code == 200
assert captured["frozen_message_count"] == 2
def test_openai_cache_mode_restores_mutated_frozen_prefix() -> None:
captured = {}
with _make_proxy_client() as client:
proxy = client.app.state.proxy
proxy.config.optimize = True
proxy.config.mode = "cache"
fake_tracker = _FakePrefixTracker(frozen_count=0)
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
"stable-session"
)
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
original_messages = [
{"role": "user", "content": "turn1"},
{"role": "assistant", "content": "turn1-assistant"},
{"role": "user", "content": "current turn"},
]
def _fake_apply(**kwargs):
mutated = list(kwargs["messages"])
mutated[0] = {**mutated[0], "content": "MUTATED_PREFIX"}
return SimpleNamespace(
messages=mutated,
transforms_applied=["fake:mutated"],
timing={},
tokens_before=70,
tokens_after=65,
waste_signals=None,
)
proxy.openai_pipeline.apply = _fake_apply
async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001
captured["body"] = body
return httpx.Response(
200,
json={
"id": "chatcmpl_2",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 65, "completion_tokens": 3, "total_tokens": 68},
},
)
proxy._retry_request = _fake_retry
response = client.post(
"/v1/chat/completions",
headers={"authorization": "Bearer test-key"},
json={
"model": "gpt-4o-mini",
"messages": original_messages,
},
)
assert response.status_code == 200
sent_messages = captured["body"]["messages"]
assert sent_messages[0] == original_messages[0]
assert sent_messages[1] == original_messages[1]