trinity-arena / test_integration.py
Julien Simon
test: add end-to-end integration tests
d9feb8c
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from engine import run_comparison
@pytest.mark.asyncio
async def test_full_pipeline_mocked():
"""End-to-end: prompt → 3 models → judge → best value."""
mock_model_results = {
"nano": {"content": "Simple answer", "prompt_tokens": 50, "completion_tokens": 100, "error": None},
"mini": {"content": "Detailed answer with examples", "prompt_tokens": 50, "completion_tokens": 200, "error": None},
"large": {"content": "Comprehensive answer with examples and edge cases", "prompt_tokens": 50, "completion_tokens": 300, "error": None},
}
mock_scores = {"nano": 7, "mini": 9, "large": 9, "reasoning": "Nano adequate, Mini and Large excellent"}
with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
patch("engine.rate_limiter") as mock_limiter:
mock_limiter.check.return_value = True
result = await run_comparison("Explain async/await in Python", ip="127.0.0.1")
assert result["error"] is None
assert all(result["responses"][k]["content"] for k in ["nano", "mini", "large"])
assert result["scores"]["nano"] == 7
assert result["scores"]["mini"] == 9
assert result["best_value"] == "large"
assert result["costs"]["large"] == 0.0
assert result["costs"]["nano"] > 0
@pytest.mark.asyncio
async def test_pipeline_with_one_model_failure():
mock_model_results = {
"nano": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
"mini": {"content": "Mini works", "prompt_tokens": 50, "completion_tokens": 100, "error": None},
"large": {"content": "Large works", "prompt_tokens": 50, "completion_tokens": 200, "error": None},
}
mock_scores = {"mini": 8, "large": 9, "reasoning": "Both good"}
with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
patch("engine.rate_limiter") as mock_limiter:
mock_limiter.check.return_value = True
result = await run_comparison("Test prompt", ip="127.0.0.1")
assert result["error"] is None
assert result["responses"]["nano"]["error"] == "timeout"
assert result["best_value"] == "large"
@pytest.mark.asyncio
async def test_pipeline_rate_limited():
with patch("engine.rate_limiter") as mock_limiter:
mock_limiter.check.return_value = False
mock_limiter.reset_time.return_value = __import__("time").time() + 7200
result = await run_comparison("Test", ip="1.2.3.4")
assert result["error"] is not None
assert "Rate limit" in result["error"]