from __future__ import annotations import json import socket import threading import time import unittest from unittest.mock import patch from chatmock.app import create_app from chatmock.session import reset_session_state from websockets.sync.client import connect as ws_connect class FakeUpstream: def __init__( self, events: list[dict[str, object]] | None = None, *, status_code: int = 200, headers: dict[str, str] | None = None, content: bytes | None = None, text: str = "", ) -> None: self._events = events self.status_code = status_code self.headers = headers or {} self.content = content or b"" self.text = text def iter_lines(self, decode_unicode: bool = False): for event in self._events or []: payload = f"data: {json.dumps(event)}" yield payload if decode_unicode else payload.encode("utf-8") def iter_content(self, chunk_size=None): if self.content: yield self.content return for event in self._events or []: payload = f"data: {json.dumps(event)}\n\n".encode("utf-8") yield payload def json(self): return json.loads(self.content.decode("utf-8")) def close(self) -> None: return None class RouteTests(unittest.TestCase): def setUp(self) -> None: reset_session_state() self.app = create_app() self.client = self.app.test_client() def test_openai_models_list(self) -> None: response = self.client.get("/v1/models") body = response.get_json() self.assertEqual(response.status_code, 200) model_ids = [item["id"] for item in body["data"]] self.assertIn("gpt-5.4", model_ids) self.assertIn("gpt-5.4-mini", model_ids) self.assertIn("gpt-5.3-codex-spark", model_ids) def test_ollama_tags_list(self) -> None: response = self.client.get("/api/tags") body = response.get_json() self.assertEqual(response.status_code, 200) model_names = [item["name"] for item in body["models"]] self.assertIn("gpt-5.4", model_names) self.assertIn("gpt-5.4-mini", model_names) @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ {"type": "response.output_text.delta", "delta": "hello"}, {"type": "response.completed", "response": {"id": "resp-openai"}}, ] ), None, ) response = self.client.post( "/v1/chat/completions", json={"model": "gpt5.4-mini", "messages": [{"role": "user", "content": "hi"}]}, ) body = response.get_json() self.assertEqual(response.status_code, 200) self.assertEqual(body["choices"][0]["message"]["content"], "hello") self.assertEqual(body["model"], "gpt5.4-mini") @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") client = app.test_client() mock_start.return_value = ( FakeUpstream( [ {"type": "response.output_text.delta", "delta": "hello"}, {"type": "response.completed", "response": {"id": "resp-openai"}}, ] ), None, ) response = client.post( "/v1/chat/completions", json={"model": "gpt-5.3-codex", "messages": [{"role": "user", "content": "hi"}]}, ) self.assertEqual(response.status_code, 200) self.assertEqual(mock_start.call_args.args[0], "gpt-5.4") @patch("chatmock.routes_ollama.start_upstream_request") def test_ollama_chat(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ {"type": "response.output_text.delta", "delta": "hello"}, {"type": "response.completed"}, ] ), None, ) response = self.client.post( "/api/chat", json={"model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], "stream": False}, ) body = response.get_json() self.assertEqual(response.status_code, 200) self.assertEqual(body["message"]["content"], "hello") self.assertEqual(body["model"], "gpt-5.4") @patch("chatmock.routes_ollama.start_upstream_request") def test_ollama_chat_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") client = app.test_client() mock_start.return_value = ( FakeUpstream( [ {"type": "response.output_text.delta", "delta": "hello"}, {"type": "response.completed"}, ] ), None, ) response = client.post( "/api/chat", json={"model": "gpt-5.3-codex", "messages": [{"role": "user", "content": "hi"}], "stream": False}, ) body = response.get_json() self.assertEqual(response.status_code, 200) self.assertEqual(mock_start.call_args.args[0], "gpt-5.4") self.assertEqual(body["model"], "gpt-5.4") @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ {"type": "response.output_text.delta", "delta": "hello"}, {"type": "response.completed", "response": {"id": "resp-openai"}}, ] ), None, ) response = self.client.post( "/v1/chat/completions", json={ "model": "gpt-5.4", "fast_mode": True, "messages": [{"role": "user", "content": "hi"}], }, ) self.assertEqual(response.status_code, 200) self.assertEqual(mock_start.call_args.kwargs["service_tier"], "priority") @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_fast_mode_false_overrides_server_default(self, mock_start) -> None: app = create_app(fast_mode=True) client = app.test_client() mock_start.return_value = ( FakeUpstream( [ {"type": "response.output_text.delta", "delta": "hello"}, {"type": "response.completed", "response": {"id": "resp-openai"}}, ] ), None, ) response = client.post( "/v1/chat/completions", json={ "model": "gpt-5.4", "fast_mode": False, "messages": [{"role": "user", "content": "hi"}], }, ) self.assertEqual(response.status_code, 200) self.assertIsNone(mock_start.call_args.kwargs["service_tier"]) @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_start) -> None: response = self.client.post( "/v1/chat/completions", json={ "model": "gpt-5.3-codex", "fast_mode": True, "messages": [{"role": "user", "content": "hi"}], }, ) body = response.get_json() self.assertEqual(response.status_code, 400) self.assertIn("Fast mode is not supported", body["error"]["message"]) mock_start.assert_not_called() @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_returns_completed_response_object(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_123", "object": "response", "status": "in_progress"}, }, { "type": "response.completed", "response": { "id": "resp_123", "object": "response", "status": "completed", "output": [], }, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ) response = self.client.post( "/v1/responses", json={"model": "gpt5.4-mini", "input": "hello"}, ) body = response.get_json() self.assertEqual(response.status_code, 200) self.assertEqual(body["id"], "resp_123") outbound_payload = mock_start.call_args.args[0] self.assertEqual(outbound_payload["model"], "gpt-5.4-mini") self.assertEqual(outbound_payload["store"], False) self.assertEqual( outbound_payload["input"], [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}], ) self.assertEqual(outbound_payload["reasoning"]["effort"], "medium") self.assertIsInstance(outbound_payload["prompt_cache_key"], str) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_honors_debug_model_override(self, mock_start) -> None: app = create_app(debug_model="gpt-5.4") client = app.test_client() mock_start.return_value = ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_debug", "object": "response", "status": "in_progress"}, }, { "type": "response.completed", "response": { "id": "resp_debug", "object": "response", "status": "completed", "output": [], }, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ) response = client.post( "/v1/responses", json={"model": "gpt-5.3-codex", "input": "hello"}, ) self.assertEqual(response.status_code, 200) outbound_payload = mock_start.call_args.args[0] self.assertEqual(outbound_payload["model"], "gpt-5.4") @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_strips_unsupported_max_output_tokens(self, mock_start) -> None: mock_start.return_value = ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_limit", "object": "response", "status": "in_progress"}, }, { "type": "response.completed", "response": { "id": "resp_limit", "object": "response", "status": "completed", "output": [], }, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ) response = self.client.post( "/v1/responses", json={"model": "gpt-5.4", "input": "hello", "max_output_tokens": 20}, ) self.assertEqual(response.status_code, 200) outbound_payload = mock_start.call_args.args[0] self.assertNotIn("max_output_tokens", outbound_payload) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None: mock_start.side_effect = [ ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_1", "object": "response", "status": "in_progress"}, }, { "type": "response.output_item.done", "item": { "type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}], }, }, { "type": "response.completed", "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []}, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_2", "object": "response", "status": "in_progress"}, }, { "type": "response.completed", "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []}, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ] first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}) second = self.client.post( "/v1/responses", json={ "model": "gpt-5.4", "input": [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, ], }, ) self.assertEqual(first.status_code, 200) self.assertEqual(second.status_code, 200) outbound_payload = mock_start.call_args_list[1].args[0] self.assertNotIn("previous_response_id", outbound_payload) self.assertEqual( outbound_payload["input"], [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, ], ) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_falls_back_to_full_create_when_non_input_fields_change(self, mock_start) -> None: mock_start.side_effect = [ ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_1", "object": "response", "status": "in_progress"}, }, { "type": "response.completed", "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []}, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ( FakeUpstream( [ { "type": "response.created", "response": {"id": "resp_2", "object": "response", "status": "in_progress"}, }, { "type": "response.completed", "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []}, }, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ] headers = {"X-Session-Id": "session-fixed"} first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers) second = self.client.post( "/v1/responses", json={ "model": "gpt-5.4", "instructions": "changed", "input": [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, ], }, headers=headers, ) self.assertEqual(first.status_code, 200) self.assertEqual(second.status_code, 200) outbound_payload = mock_start.call_args_list[1].args[0] self.assertNotIn("previous_response_id", outbound_payload) self.assertEqual( outbound_payload["input"], [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, ], ) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> None: mock_start.side_effect = [ ( FakeUpstream( [ {"type": "response.created", "response": {"id": "resp_1"}}, {"type": "response.completed", "response": {"id": "resp_1", "output": []}}, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ( FakeUpstream( [ {"type": "response.failed", "response": {"error": {"message": "boom"}}}, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ( FakeUpstream( [ {"type": "response.created", "response": {"id": "resp_3"}}, {"type": "response.completed", "response": {"id": "resp_3", "output": []}}, ], headers={"Content-Type": "text/event-stream"}, ), None, ), ] headers = {"X-Session-Id": "session-fixed"} first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers) second = self.client.post( "/v1/responses", json={ "model": "gpt-5.4", "input": [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, ], }, headers=headers, ) third = self.client.post( "/v1/responses", json={ "model": "gpt-5.4", "input": [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]}, ], }, headers=headers, ) self.assertEqual(first.status_code, 200) self.assertEqual(second.status_code, 502) self.assertEqual(third.status_code, 200) outbound_payload = mock_start.call_args_list[2].args[0] self.assertNotIn("previous_response_id", outbound_payload) self.assertEqual( outbound_payload["input"], [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]}, ], ) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_stream_passthrough(self, mock_start) -> None: chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n' mock_start.return_value = ( FakeUpstream( headers={"Content-Type": "text/event-stream"}, content=chunk, ), None, ) response = self.client.post( "/v1/responses", json={"model": "gpt-5.4", "input": "hello", "stream": True}, ) self.assertEqual(response.status_code, 200) self.assertIn("response.output_text.delta", response.get_data(as_text=True)) @patch("chatmock.routes_openai.start_upstream_raw_request") def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None: response = self.client.post( "/v1/responses", json={"model": "gpt-5.3-codex", "input": "hello", "service_tier": "priority"}, ) body = response.get_json() self.assertEqual(response.status_code, 400) self.assertIn("Fast mode is not supported", body["error"]["message"]) mock_start.assert_not_called() @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) @patch("chatmock.websocket_routes.connect_upstream_websocket") def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None: class FakeUpstreamWebsocket: def __init__(self) -> None: self.sent: list[str] = [] self._messages = [ json.dumps({"type": "response.created", "response": {"id": "resp_ws_1"}}), json.dumps({ "type": "response.output_item.done", "item": { "type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}], }, }), json.dumps({"type": "response.completed", "response": {"id": "resp_ws_1"}}), json.dumps({"type": "response.created", "response": {"id": "resp_ws_2"}}), json.dumps({"type": "response.completed", "response": {"id": "resp_ws_2"}}), ] def send(self, message: str) -> None: self.sent.append(message) def recv(self) -> str: return self._messages.pop(0) def close(self) -> None: return None fake_upstream = FakeUpstreamWebsocket() mock_connect.return_value = fake_upstream app = create_app() sock = socket.socket() sock.bind(("127.0.0.1", 0)) host, port = sock.getsockname() sock.close() server_thread = threading.Thread( target=app.run, kwargs={ "host": host, "port": port, "use_reloader": False, "threaded": True, }, daemon=True, ) server_thread.start() time.sleep(0.5) with ws_connect(f"ws://{host}:{port}/v1/responses") as client: client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True})) first = json.loads(client.recv()) assistant = json.loads(client.recv()) second = json.loads(client.recv()) client.send( json.dumps( { "type": "response.create", "model": "gpt-5.4", "fast_mode": True, "input": [ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, ], } ) ) third = json.loads(client.recv()) fourth = json.loads(client.recv()) self.assertEqual(first["type"], "response.created") self.assertEqual(assistant["type"], "response.output_item.done") self.assertEqual(second["type"], "response.completed") self.assertEqual(third["type"], "response.created") self.assertEqual(fourth["type"], "response.completed") outbound = json.loads(fake_upstream.sent[0]) self.assertEqual(outbound["model"], "gpt-5.4") self.assertEqual(outbound["service_tier"], "priority") self.assertEqual(outbound["type"], "response.create") self.assertEqual( outbound["input"], [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}], ) self.assertIn("prompt_cache_key", outbound) follow_up = json.loads(fake_upstream.sent[1]) self.assertEqual(follow_up["previous_response_id"], "resp_ws_1") self.assertEqual( follow_up["input"], [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}], ) if __name__ == "__main__": unittest.main()