SummarizerApp / tests /test_v2_api.py
ming
Migrate to Ruff for linting/formatting and add comprehensive import tests
29ed661
"""
Tests for V2 API endpoints.
"""
import json
from unittest.mock import patch
import pytest
from fastapi.testclient import TestClient
class TestV2SummarizeStream:
"""Test V2 streaming summarization endpoint."""
@pytest.mark.integration
def test_v2_stream_endpoint_exists(self, client: TestClient):
"""Test that V2 stream endpoint exists and returns proper response."""
response = client.post(
"/api/v2/summarize/stream",
json={"text": "This is a test text to summarize.", "max_tokens": 50},
)
# Should return 200 with SSE content type
assert response.status_code == 200
assert response.headers["content-type"] == "text/event-stream; charset=utf-8"
assert "Cache-Control" in response.headers
assert "Connection" in response.headers
@pytest.mark.integration
def test_v2_stream_endpoint_validation_error(self, client: TestClient):
"""Test V2 stream endpoint with validation error."""
response = client.post(
"/api/v2/summarize/stream",
json={"text": "", "max_tokens": 50}, # Empty text should fail validation
)
assert response.status_code == 422 # Validation error
@pytest.mark.integration
def test_v2_stream_endpoint_sse_format(self, client: TestClient):
"""Test that V2 stream endpoint returns proper SSE format."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
# Mock the streaming response
async def mock_generator():
yield {"content": "This is a", "done": False, "tokens_used": 1}
yield {"content": " test summary.", "done": False, "tokens_used": 2}
yield {
"content": "",
"done": True,
"tokens_used": 2,
"latency_ms": 100.0,
}
mock_stream.return_value = mock_generator()
response = client.post(
"/api/v2/summarize/stream",
json={"text": "This is a test text to summarize.", "max_tokens": 50},
)
assert response.status_code == 200
# Check SSE format
content = response.text
lines = content.strip().split("\n")
# Should have data lines
data_lines = [line for line in lines if line.startswith("data: ")]
assert len(data_lines) >= 3 # At least 3 chunks
# Parse first data line
first_data = json.loads(data_lines[0][6:]) # Remove 'data: ' prefix
assert "content" in first_data
assert "done" in first_data
assert first_data["content"] == "This is a"
assert first_data["done"] is False
@pytest.mark.integration
def test_v2_stream_endpoint_error_handling(self, client: TestClient):
"""Test V2 stream endpoint error handling."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
# Mock an error in the stream
async def mock_error_generator():
yield {"content": "", "done": True, "error": "Model not available"}
mock_stream.return_value = mock_error_generator()
response = client.post(
"/api/v2/summarize/stream",
json={"text": "This is a test text to summarize.", "max_tokens": 50},
)
assert response.status_code == 200
# Check error is properly formatted in SSE
content = response.text
lines = content.strip().split("\n")
data_lines = [line for line in lines if line.startswith("data: ")]
# Parse error data line
error_data = json.loads(data_lines[0][6:]) # Remove 'data: ' prefix
assert "error" in error_data
assert error_data["done"] is True
assert "Model not available" in error_data["error"]
@pytest.mark.integration
def test_v2_stream_endpoint_uses_v1_schema(self, client: TestClient):
"""Test that V2 endpoint uses the same schema as V1 for compatibility."""
# Test with V1-style request
response = client.post(
"/api/v2/summarize/stream",
json={
"text": "This is a test text to summarize.",
"max_tokens": 50,
"prompt": "Summarize this text:",
},
)
# Should accept V1 schema format
assert response.status_code == 200
@pytest.mark.integration
def test_v2_stream_endpoint_parameter_mapping(self, client: TestClient):
"""Test that V2 correctly maps V1 parameters to V2 service."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
async def mock_generator():
yield {"content": "", "done": True}
mock_stream.return_value = mock_generator()
response = client.post(
"/api/v2/summarize/stream",
json={
"text": "Test text",
"max_tokens": 100, # Should map to max_new_tokens
"prompt": "Custom prompt",
},
)
assert response.status_code == 200
# Verify service was called with correct parameters
mock_stream.assert_called_once()
call_args = mock_stream.call_args
# Check that max_tokens was mapped to max_new_tokens
assert call_args[1]["max_new_tokens"] == 100
assert call_args[1]["prompt"] == "Custom prompt"
assert call_args[1]["text"] == "Test text"
@pytest.mark.integration
def test_v2_adaptive_token_logic_short_text(self, client: TestClient):
"""Test adaptive token logic for short texts (<1500 chars)."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
async def mock_generator():
yield {"content": "", "done": True}
mock_stream.return_value = mock_generator()
# Short text (500 chars)
short_text = "This is a short text. " * 20 # ~500 chars
response = client.post(
"/api/v2/summarize/stream",
json={
"text": short_text,
# Don't specify max_tokens to test adaptive logic
},
)
assert response.status_code == 200
# Verify service was called with adaptive max_new_tokens
mock_stream.assert_called_once()
call_args = mock_stream.call_args
# For short text, should use 60-100 tokens
max_new_tokens = call_args[1]["max_new_tokens"]
assert 60 <= max_new_tokens <= 100
@pytest.mark.integration
def test_v2_adaptive_token_logic_long_text(self, client: TestClient):
"""Test adaptive token logic for long texts (>1500 chars)."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
async def mock_generator():
yield {"content": "", "done": True}
mock_stream.return_value = mock_generator()
# Long text (2000 chars)
long_text = (
"This is a longer text that should trigger adaptive token logic. " * 40
) # ~2000 chars
response = client.post(
"/api/v2/summarize/stream",
json={
"text": long_text,
# Don't specify max_tokens to test adaptive logic
},
)
assert response.status_code == 200
# Verify service was called with adaptive max_new_tokens
mock_stream.assert_called_once()
call_args = mock_stream.call_args
# For long text, should use proportional scaling but capped
max_new_tokens = call_args[1]["max_new_tokens"]
assert 100 <= max_new_tokens <= 400
@pytest.mark.integration
def test_v2_temperature_and_top_p_parameters(self, client: TestClient):
"""Test that temperature and top_p parameters are passed correctly."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
async def mock_generator():
yield {"content": "", "done": True}
mock_stream.return_value = mock_generator()
response = client.post(
"/api/v2/summarize/stream",
json={"text": "Test text", "temperature": 0.5, "top_p": 0.8},
)
assert response.status_code == 200
# Verify service was called with correct parameters
mock_stream.assert_called_once()
call_args = mock_stream.call_args
assert call_args[1]["temperature"] == 0.5
assert call_args[1]["top_p"] == 0.8
@pytest.mark.integration
def test_v2_default_temperature_and_top_p(self, client: TestClient):
"""Test that default temperature and top_p values are used when not specified."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
async def mock_generator():
yield {"content": "", "done": True}
mock_stream.return_value = mock_generator()
response = client.post(
"/api/v2/summarize/stream",
json={
"text": "Test text"
# Don't specify temperature or top_p
},
)
assert response.status_code == 200
# Verify service was called with default parameters
mock_stream.assert_called_once()
call_args = mock_stream.call_args
assert call_args[1]["temperature"] == 0.3 # Default temperature
assert call_args[1]["top_p"] == 0.9 # Default top_p
@pytest.mark.integration
def test_v2_recursive_summarization_trigger(self, client: TestClient):
"""Test that recursive summarization is triggered for long texts."""
with patch(
"app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream"
) as mock_stream:
async def mock_generator():
yield {"content": "", "done": True}
mock_stream.return_value = mock_generator()
# Very long text (>1500 chars) to trigger recursive summarization
very_long_text = (
"This is a very long text that should definitely trigger recursive summarization logic. "
* 30
) # ~2000+ chars
response = client.post(
"/api/v2/summarize/stream", json={"text": very_long_text}
)
assert response.status_code == 200
# The service should be called, and internally it should detect long text
# and use recursive summarization
mock_stream.assert_called_once()
class TestV2APICompatibility:
"""Test V2 API compatibility with V1."""
@pytest.mark.integration
def test_v2_uses_same_schemas_as_v1(self):
"""Test that V2 imports and uses the same schemas as V1."""
from app.api.v1.schemas import SummarizeRequest as V1SummarizeRequest
from app.api.v1.schemas import SummarizeResponse as V1SummarizeResponse
from app.api.v2.schemas import SummarizeRequest, SummarizeResponse
# Should be the same classes
assert SummarizeRequest is V1SummarizeRequest
assert SummarizeResponse is V1SummarizeResponse
@pytest.mark.integration
def test_v2_endpoint_structure_matches_v1(self, client: TestClient):
"""Test that V2 endpoint structure matches V1."""
# V1 endpoints
v1_response = client.post(
"/api/v1/summarize/stream", json={"text": "Test", "max_tokens": 50}
)
# V2 endpoints should have same structure
v2_response = client.post(
"/api/v2/summarize/stream", json={"text": "Test", "max_tokens": 50}
)
# Both should return 200 (even if V2 fails due to missing dependencies)
# The important thing is the endpoint structure is the same
assert v1_response.status_code in [200, 502] # 502 if Ollama not running
assert v2_response.status_code in [200, 502] # 502 if HF not available
# Both should have same headers
assert v1_response.headers.get("content-type") == v2_response.headers.get(
"content-type"
)