Spaces:

minhtudragon
/

headroom

Running

File size: 8,960 Bytes

"""Tests for compression header handling in the proxy server.

These tests verify that the proxy correctly removes Content-Encoding headers
from responses after httpx automatically decompresses them, preventing
double-decompression errors (ZlibError) in clients.
"""

import gzip
import json

import pytest


@pytest.fixture
def mock_anthropic_response_with_compression_headers():
    """Create a mock response that simulates httpx behavior.

    httpx automatically decompresses responses but leaves compression headers.
    This is what causes the ZlibError bug we're testing for.
    """

    class MockResponse:
        """Mock httpx response with compression headers."""

        def __init__(self):
            self.response_data = {
                "id": "msg_test123",
                "type": "message",
                "role": "assistant",
                "content": [{"type": "text", "text": "Hello!"}],
                "model": "claude-3-5-sonnet-20241022",
                "stop_reason": "end_turn",
                "usage": {"input_tokens": 10, "output_tokens": 5},
            }
            # Body is already decompressed (httpx does this automatically)
            self.content = json.dumps(self.response_data).encode("utf-8")
            self.status_code = 200

            # Headers still contain compression info (this is the bug!)
            self.headers = {
                "content-type": "application/json",
                "content-encoding": "gzip",  # Should be removed!
                "content-length": str(len(gzip.compress(self.content))),  # Wrong!
                "x-request-id": "test-request-id",
            }

    return MockResponse()


class TestCompressionHeaderRemoval:
    """Tests for Content-Encoding header removal logic."""

    def test_compression_headers_are_removed_from_dict(
        self, mock_anthropic_response_with_compression_headers
    ):
        """Test that our fix removes compression headers from response headers."""
        mock_response = mock_anthropic_response_with_compression_headers

        # Simulate what the fixed code does
        response_headers = dict(mock_response.headers)
        response_headers.pop("content-encoding", None)
        response_headers.pop("content-length", None)

        # Verify compression headers are removed
        assert "content-encoding" not in response_headers
        assert "content-length" not in response_headers

        # Verify other headers are preserved
        assert response_headers["content-type"] == "application/json"
        assert response_headers["x-request-id"] == "test-request-id"

    def test_response_body_is_decompressed_not_compressed(
        self, mock_anthropic_response_with_compression_headers
    ):
        """Verify the response content is already decompressed (httpx behavior)."""
        mock_response = mock_anthropic_response_with_compression_headers

        # The content should be valid JSON (decompressed)
        response_data = json.loads(mock_response.content)
        assert response_data["id"] == "msg_test123"

        # Trying to decompress it again should fail (proving it's not compressed)
        with pytest.raises((gzip.BadGzipFile, OSError, Exception)):
            gzip.decompress(mock_response.content)

    def test_headers_with_wrong_content_length_cause_issues(
        self, mock_anthropic_response_with_compression_headers
    ):
        """Demonstrate that keeping compression headers causes length mismatch."""
        mock_response = mock_anthropic_response_with_compression_headers

        # The content-length header says the body is compressed size
        claimed_length = int(mock_response.headers["content-length"])

        # But the actual content is decompressed size
        actual_length = len(mock_response.content)

        # They don't match! This can cause client issues
        assert claimed_length != actual_length
        assert claimed_length < actual_length  # Compressed is smaller

    def test_removing_headers_fixes_length_mismatch(
        self, mock_anthropic_response_with_compression_headers
    ):
        """Show that removing compression headers allows proper content-length."""
        mock_response = mock_anthropic_response_with_compression_headers

        # Apply the fix
        response_headers = dict(mock_response.headers)
        response_headers.pop("content-encoding", None)
        response_headers.pop("content-length", None)

        # Now we can set correct content-length
        response_headers["content-length"] = str(len(mock_response.content))

        # Verify it matches actual content
        assert int(response_headers["content-length"]) == len(mock_response.content)


class TestAcceptEncodingStripping:
    """Tests for accept-encoding removal from forwarded request headers.

    Edge proxies like Cloudflare Workers add accept-encoding values (e.g. br,
    zstd) that the upstream provider may honor.  If httpx lacks the matching
    decompression library (e.g. brotli) it cannot decode the response body,
    causing a UnicodeDecodeError and a 502 returned to the client.

    The fix strips accept-encoding before forwarding so httpx negotiates its
    own encoding independently.
    """

    def test_accept_encoding_is_stripped_from_forwarded_headers(self):
        """accept-encoding must be removed before forwarding to the upstream."""
        # Simulate headers as received from a Cloudflare Worker client
        request_headers = {
            "authorization": "Bearer sk-test",
            "content-type": "application/json",
            "accept-encoding": "gzip, br, zstd",
            "host": "headroom.example.com",
            "content-length": "123",
        }

        # Replicate the handler logic
        headers = dict(request_headers.items())
        headers.pop("host", None)
        headers.pop("content-length", None)
        headers.pop("accept-encoding", None)

        assert "accept-encoding" not in headers

    def test_other_headers_preserved_after_stripping(self):
        """Only hop-by-hop / negotiation headers are removed; auth etc. survive."""
        request_headers = {
            "authorization": "Bearer sk-test",
            "content-type": "application/json",
            "accept-encoding": "gzip, br",
            "x-custom": "value",
            "host": "headroom.example.com",
            "content-length": "42",
        }

        headers = dict(request_headers.items())
        headers.pop("host", None)
        headers.pop("content-length", None)
        headers.pop("accept-encoding", None)

        assert headers["authorization"] == "Bearer sk-test"
        assert headers["content-type"] == "application/json"
        assert headers["x-custom"] == "value"
        assert "host" not in headers
        assert "content-length" not in headers
        assert "accept-encoding" not in headers

    def test_strip_is_safe_when_accept_encoding_absent(self):
        """pop() on a missing key must not raise — direct curl calls have no header."""
        request_headers = {
            "authorization": "Bearer sk-test",
            "content-type": "application/json",
        }

        headers = dict(request_headers.items())
        # Must not raise KeyError
        headers.pop("accept-encoding", None)

        assert headers == {
            "authorization": "Bearer sk-test",
            "content-type": "application/json",
        }

    def test_brotli_encoding_value_is_stripped(self):
        """Specifically guard against 'br' which breaks httpx without brotli package."""
        for encoding_value in ["br", "gzip, br", "gzip, br, zstd", "zstd"]:
            headers = {"accept-encoding": encoding_value, "content-type": "application/json"}
            headers.pop("accept-encoding", None)
            assert "accept-encoding" not in headers


class TestNoRegressionForUncompressedResponses:
    """Ensure the fix doesn't break responses that were never compressed."""

    def test_pop_on_missing_keys_is_safe(self):
        """Verify that .pop() on non-existent keys doesn't cause errors."""
        headers = {
            "content-type": "application/json",
            # No compression headers
        }

        # This should not raise KeyError
        headers.pop("content-encoding", None)
        headers.pop("content-length", None)

        # Headers should be unchanged
        assert headers == {"content-type": "application/json"}

    def test_dict_conversion_preserves_headers(self):
        """Verify dict() conversion doesn't lose headers."""
        original_headers = {
            "content-type": "application/json",
            "x-custom-header": "value",
            "authorization": "Bearer token",
        }

        # Convert to dict (as the fix does)
        converted = dict(original_headers)

        # All headers preserved
        assert converted == original_headers
        assert converted is not original_headers  # New object