File size: 15,131 Bytes
8756398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
"""Integration tests for Gemini native API endpoint with real API calls.

These tests require a valid GEMINI_API_KEY environment variable.
They test the /v1beta/models/{model}:generateContent endpoint with compression.

Run with:
    GEMINI_API_KEY=your-key pytest tests/test_proxy_gemini_native_integration.py -v
"""

import json
import os

import pytest

# Skip entire module if no API key
pytestmark = pytest.mark.skipif(
    not os.environ.get("GEMINI_API_KEY"), reason="GEMINI_API_KEY not set"
)

pytest.importorskip("fastapi")
pytest.importorskip("httpx")

from fastapi.testclient import TestClient  # noqa: E402

from headroom.proxy.server import ProxyConfig, create_app  # noqa: E402


@pytest.fixture
def gemini_native_client():
    """Create test client for Gemini native API with optimization enabled."""
    config = ProxyConfig(
        optimize=True,  # Enable compression
        cache_enabled=False,
        rate_limit_enabled=False,
        cost_tracking_enabled=False,
    )
    app = create_app(config)
    with TestClient(app) as client:
        yield client


@pytest.fixture
def api_key():
    """Get Gemini API key from environment."""
    return os.environ.get("GEMINI_API_KEY")


class TestGeminiNativeGenerateContent:
    """Test /v1beta/models/{model}:generateContent endpoint."""

    def test_basic_generation(self, gemini_native_client, api_key):
        """Basic text generation works."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={"contents": [{"parts": [{"text": "What is 2+2? Reply with just the number."}]}]},
        )
        assert response.status_code == 200
        data = response.json()

        # Verify Gemini native response format
        assert "candidates" in data
        assert len(data["candidates"]) > 0
        assert "content" in data["candidates"][0]
        assert "parts" in data["candidates"][0]["content"]
        text = data["candidates"][0]["content"]["parts"][0]["text"]
        assert "4" in text

        # Verify usage metadata
        assert "usageMetadata" in data
        assert "promptTokenCount" in data["usageMetadata"]

    def test_with_system_instruction(self, gemini_native_client, api_key):
        """System instruction works correctly."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={
                "contents": [{"parts": [{"text": "Hello"}]}],
                "systemInstruction": {"parts": [{"text": "Always respond with exactly one word."}]},
            },
        )
        assert response.status_code == 200
        data = response.json()
        text = data["candidates"][0]["content"]["parts"][0]["text"]
        # Should be a short response due to system instruction
        assert len(text.split()) <= 3

    def test_multi_turn_conversation(self, gemini_native_client, api_key):
        """Multi-turn conversations maintain context."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={
                "contents": [
                    {"role": "user", "parts": [{"text": "My name is TestUser456."}]},
                    {"role": "model", "parts": [{"text": "Nice to meet you, TestUser456!"}]},
                    {"role": "user", "parts": [{"text": "What is my name?"}]},
                ]
            },
        )
        assert response.status_code == 200
        data = response.json()
        text = data["candidates"][0]["content"]["parts"][0]["text"].lower()
        assert "testuser456" in text

    def test_function_calling(self, gemini_native_client, api_key):
        """Function calling / tools work correctly."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={
                "contents": [{"parts": [{"text": "What is the weather in Tokyo?"}]}],
                "tools": [
                    {
                        "functionDeclarations": [
                            {
                                "name": "get_weather",
                                "description": "Get current weather for a location",
                                "parameters": {
                                    "type": "object",
                                    "properties": {
                                        "location": {"type": "string", "description": "City name"}
                                    },
                                    "required": ["location"],
                                },
                            }
                        ]
                    }
                ],
            },
        )
        assert response.status_code == 200
        data = response.json()

        # Verify function call response
        parts = data["candidates"][0]["content"]["parts"]
        function_call = None
        for part in parts:
            if "functionCall" in part:
                function_call = part["functionCall"]
                break

        assert function_call is not None
        assert function_call["name"] == "get_weather"
        assert "tokyo" in function_call["args"]["location"].lower()

    def test_generation_config(self, gemini_native_client, api_key):
        """Generation config parameters are respected."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={
                "contents": [{"parts": [{"text": "Write a very short poem about AI."}]}],
                "generationConfig": {"maxOutputTokens": 50, "temperature": 0.1},
            },
        )
        assert response.status_code == 200
        data = response.json()
        # Response should be limited by maxOutputTokens
        assert data["usageMetadata"]["candidatesTokenCount"] <= 60  # Some buffer


class TestGeminiNativeCompression:
    """Test that compression works with Gemini native API."""

    def test_compression_on_model_message(self, gemini_native_client, api_key):
        """Large data in model message gets compressed."""
        # Create large JSON data (simulating tool output)
        items = [
            {"id": i, "name": f"Item {i}", "desc": f"Description for item {i}"} for i in range(100)
        ]
        tool_output = json.dumps(items)

        # Send as model message (like tool returning data)
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={
                "contents": [
                    {"role": "user", "parts": [{"text": "Get items from database"}]},
                    {"role": "model", "parts": [{"text": f"Here are the results:\n{tool_output}"}]},
                    {"role": "user", "parts": [{"text": "How many items are there?"}]},
                ]
            },
        )
        assert response.status_code == 200
        data = response.json()
        text = data["candidates"][0]["content"]["parts"][0]["text"]
        # Model should correctly count the items
        assert "100" in text

        # Check that compression happened via stats
        stats = gemini_native_client.get("/stats").json()
        # At least some tokens should have been saved
        assert stats["tokens"]["saved"] >= 0  # May or may not compress depending on size

    def test_user_messages_protected(self, gemini_native_client, api_key):
        """User messages are not compressed (by design)."""
        # Large data in user message
        items = [{"id": i} for i in range(50)]
        user_data = json.dumps(items)

        # First request with data in user message
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={
                "contents": [
                    {"role": "user", "parts": [{"text": f"Analyze this data: {user_data}"}]}
                ]
            },
        )
        assert response.status_code == 200
        # The request should succeed - user messages are protected from compression


class TestGeminiNativeStats:
    """Test that proxy stats track Gemini native requests correctly."""

    def test_stats_track_gemini_provider(self, gemini_native_client, api_key):
        """Stats show requests under 'gemini' provider."""
        # Make a request
        gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={"contents": [{"parts": [{"text": "Hi"}]}]},
        )

        stats = gemini_native_client.get("/stats").json()
        assert "gemini" in stats["requests"]["by_provider"]
        assert stats["requests"]["by_provider"]["gemini"] >= 1

    def test_stats_track_model(self, gemini_native_client, api_key):
        """Stats track the specific model used."""
        gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}",
            json={"contents": [{"parts": [{"text": "Hi"}]}]},
        )

        stats = gemini_native_client.get("/stats").json()
        assert "gemini-2.0-flash" in stats["requests"]["by_model"]


class TestGeminiNativeErrorHandling:
    """Test error handling for Gemini native API."""

    def test_invalid_api_key(self, gemini_native_client):
        """Invalid API key returns appropriate error."""
        response = gemini_native_client.post(
            "/v1beta/models/gemini-2.0-flash:generateContent?key=invalid-key-123",
            json={"contents": [{"parts": [{"text": "Hi"}]}]},
        )
        assert response.status_code >= 400

    def test_invalid_model(self, gemini_native_client, api_key):
        """Invalid model returns appropriate error."""
        response = gemini_native_client.post(
            f"/v1beta/models/nonexistent-model-xyz:generateContent?key={api_key}",
            json={"contents": [{"parts": [{"text": "Hi"}]}]},
        )
        assert response.status_code >= 400

    def test_empty_contents(self, gemini_native_client, api_key):
        """Empty contents handled gracefully."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}", json={"contents": []}
        )
        # Should either return error or handle gracefully
        assert response.status_code in [200, 400]


class TestGeminiNativeHeaderAuth:
    """Test authentication via x-goog-api-key header."""

    def test_header_auth(self, gemini_native_client, api_key):
        """API key in header works."""
        response = gemini_native_client.post(
            "/v1beta/models/gemini-2.0-flash:generateContent",
            headers={"x-goog-api-key": api_key},
            json={"contents": [{"parts": [{"text": "Hi"}]}]},
        )
        assert response.status_code == 200


class TestGeminiNativeCountTokens:
    """Test /v1beta/models/{model}:countTokens endpoint with compression."""

    def test_count_tokens_basic(self, gemini_native_client, api_key):
        """Basic token counting works."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:countTokens?key={api_key}",
            json={"contents": [{"parts": [{"text": "Hello, world!"}]}]},
        )
        assert response.status_code == 200
        data = response.json()

        # Verify response format
        assert "totalTokens" in data
        assert isinstance(data["totalTokens"], int)
        assert data["totalTokens"] > 0

    def test_count_tokens_with_system_instruction(self, gemini_native_client, api_key):
        """Token counting includes system instruction."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:countTokens?key={api_key}",
            json={
                "contents": [{"parts": [{"text": "Hello"}]}],
                "systemInstruction": {"parts": [{"text": "You are a helpful assistant."}]},
            },
        )
        # Note: systemInstruction may not be supported by countTokens in all versions
        assert response.status_code in [200, 400]
        if response.status_code == 200:
            data = response.json()
            assert "totalTokens" in data
            assert data["totalTokens"] > 0

    def test_count_tokens_reflects_compression(self, gemini_native_client, api_key):
        """Token count reflects compressed content size."""
        # Create large repetitive JSON data that should compress
        items = [
            {
                "id": i,
                "name": f"Item {i}",
                "description": f"This is the description for item number {i}",
            }
            for i in range(100)
        ]
        tool_output = json.dumps(items)

        # Count tokens with large data in model message (which gets compressed)
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:countTokens?key={api_key}",
            json={
                "contents": [
                    {"role": "user", "parts": [{"text": "Get items from database"}]},
                    {"role": "model", "parts": [{"text": f"Here are the results:\n{tool_output}"}]},
                    {"role": "user", "parts": [{"text": "Summarize these items"}]},
                ]
            },
        )
        assert response.status_code == 200
        data = response.json()

        # Verify we got a token count
        assert "totalTokens" in data
        compressed_tokens = data["totalTokens"]
        assert compressed_tokens > 0

        # Check stats to verify compression was applied
        stats = gemini_native_client.get("/stats").json()
        # The request should have been tracked
        assert stats["requests"]["by_provider"].get("gemini", 0) >= 1

    def test_count_tokens_multi_turn(self, gemini_native_client, api_key):
        """Token counting works for multi-turn conversations."""
        response = gemini_native_client.post(
            f"/v1beta/models/gemini-2.0-flash:countTokens?key={api_key}",
            json={
                "contents": [
                    {"role": "user", "parts": [{"text": "My name is Alice."}]},
                    {"role": "model", "parts": [{"text": "Nice to meet you, Alice!"}]},
                    {"role": "user", "parts": [{"text": "What is my name?"}]},
                ]
            },
        )
        assert response.status_code == 200
        data = response.json()
        assert "totalTokens" in data
        assert data["totalTokens"] > 0

    def test_count_tokens_header_auth(self, gemini_native_client, api_key):
        """API key in header works for countTokens."""
        response = gemini_native_client.post(
            "/v1beta/models/gemini-2.0-flash:countTokens",
            headers={"x-goog-api-key": api_key},
            json={"contents": [{"parts": [{"text": "Hello"}]}]},
        )
        assert response.status_code == 200
        data = response.json()
        assert "totalTokens" in data