Spaces:
Running
Running
File size: 5,536 Bytes
026aa51 189bff3 a6d99df 026aa51 a6d99df 026aa51 189bff3 a6d99df 026aa51 a6d99df 026aa51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | """Regression tests for OpenAI cache-mode stability in proxy mode."""
from __future__ import annotations
from types import SimpleNamespace
import httpx
import pytest
pytest.importorskip("fastapi")
from fastapi.testclient import TestClient
from headroom.proxy.server import ProxyConfig, create_app
class _FakePrefixTracker:
def __init__(self, frozen_count: int):
self._frozen_count = frozen_count
def get_frozen_message_count(self) -> int:
return self._frozen_count
def update_from_response(self, **kwargs): # noqa: ANN003
return None
def _make_proxy_client() -> TestClient:
config = ProxyConfig(
optimize=False,
cache_enabled=False,
rate_limit_enabled=False,
cost_tracking_enabled=False,
log_requests=False,
ccr_inject_tool=False,
ccr_handle_responses=False,
ccr_context_tracking=False,
image_optimize=False,
)
app = create_app(config)
return TestClient(app)
def test_openai_cache_mode_freezes_previous_turns() -> None:
captured = {}
with _make_proxy_client() as client:
proxy = client.app.state.proxy
proxy.config.optimize = True
proxy.config.mode = "cache"
fake_tracker = _FakePrefixTracker(frozen_count=0)
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
"stable-session"
)
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
def _fake_apply(**kwargs):
captured["frozen_message_count"] = kwargs.get("frozen_message_count")
return SimpleNamespace(
messages=kwargs["messages"],
transforms_applied=[],
timing={},
tokens_before=60,
tokens_after=60,
waste_signals=None,
)
proxy.openai_pipeline.apply = _fake_apply
async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001
return httpx.Response(
200,
json={
"id": "chatcmpl_1",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 60, "completion_tokens": 3, "total_tokens": 63},
},
)
proxy._retry_request = _fake_retry
response = client.post(
"/v1/chat/completions",
headers={"authorization": "Bearer test-key"},
json={
"model": "gpt-4o-mini",
"messages": [
{"role": "user", "content": "turn1"},
{"role": "assistant", "content": "turn1-assistant"},
{"role": "user", "content": "current turn"},
],
},
)
assert response.status_code == 200
assert captured["frozen_message_count"] == 2
def test_openai_cache_mode_restores_mutated_frozen_prefix() -> None:
captured = {}
with _make_proxy_client() as client:
proxy = client.app.state.proxy
proxy.config.optimize = True
proxy.config.mode = "cache"
fake_tracker = _FakePrefixTracker(frozen_count=0)
proxy.session_tracker_store.compute_session_id = lambda request, model, messages: (
"stable-session"
)
proxy.session_tracker_store.get_or_create = lambda session_id, provider: fake_tracker
original_messages = [
{"role": "user", "content": "turn1"},
{"role": "assistant", "content": "turn1-assistant"},
{"role": "user", "content": "current turn"},
]
def _fake_apply(**kwargs):
mutated = list(kwargs["messages"])
mutated[0] = {**mutated[0], "content": "MUTATED_PREFIX"}
return SimpleNamespace(
messages=mutated,
transforms_applied=["fake:mutated"],
timing={},
tokens_before=70,
tokens_after=65,
waste_signals=None,
)
proxy.openai_pipeline.apply = _fake_apply
async def _fake_retry(method, url, headers, body, stream=False): # noqa: ANN001
captured["body"] = body
return httpx.Response(
200,
json={
"id": "chatcmpl_2",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "ok"},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 65, "completion_tokens": 3, "total_tokens": 68},
},
)
proxy._retry_request = _fake_retry
response = client.post(
"/v1/chat/completions",
headers={"authorization": "Bearer test-key"},
json={
"model": "gpt-4o-mini",
"messages": original_messages,
},
)
assert response.status_code == 200
sent_messages = captured["body"]["messages"]
assert sent_messages[0] == original_messages[0]
assert sent_messages[1] == original_messages[1]
|