Spaces:

juliensimon
/

trinity-arena

Running on Zero

trinity-arena / test_integration.py

Julien Simon

test: add end-to-end integration tests

d9feb8c 1 day ago

2.86 kB

	import pytest
	from unittest.mock import AsyncMock, patch, MagicMock
	from engine import run_comparison


	@pytest.mark.asyncio
	async def test_full_pipeline_mocked():
	"""End-to-end: prompt → 3 models → judge → best value."""
	mock_model_results = {
	"nano": {"content": "Simple answer", "prompt_tokens": 50, "completion_tokens": 100, "error": None},
	"mini": {"content": "Detailed answer with examples", "prompt_tokens": 50, "completion_tokens": 200, "error": None},
	"large": {"content": "Comprehensive answer with examples and edge cases", "prompt_tokens": 50, "completion_tokens": 300, "error": None},
	}
	mock_scores = {"nano": 7, "mini": 9, "large": 9, "reasoning": "Nano adequate, Mini and Large excellent"}

	with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
	patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
	patch("engine.rate_limiter") as mock_limiter:
	mock_limiter.check.return_value = True
	result = await run_comparison("Explain async/await in Python", ip="127.0.0.1")

	assert result["error"] is None
	assert all(result["responses"][k]["content"] for k in ["nano", "mini", "large"])
	assert result["scores"]["nano"] == 7
	assert result["scores"]["mini"] == 9
	assert result["best_value"] == "large"
	assert result["costs"]["large"] == 0.0
	assert result["costs"]["nano"] > 0


	@pytest.mark.asyncio
	async def test_pipeline_with_one_model_failure():
	mock_model_results = {
	"nano": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
	"mini": {"content": "Mini works", "prompt_tokens": 50, "completion_tokens": 100, "error": None},
	"large": {"content": "Large works", "prompt_tokens": 50, "completion_tokens": 200, "error": None},
	}
	mock_scores = {"mini": 8, "large": 9, "reasoning": "Both good"}

	with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
	patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
	patch("engine.rate_limiter") as mock_limiter:
	mock_limiter.check.return_value = True
	result = await run_comparison("Test prompt", ip="127.0.0.1")

	assert result["error"] is None
	assert result["responses"]["nano"]["error"] == "timeout"
	assert result["best_value"] == "large"


	@pytest.mark.asyncio
	async def test_pipeline_rate_limited():
	with patch("engine.rate_limiter") as mock_limiter:
	mock_limiter.check.return_value = False
	mock_limiter.reset_time.return_value = __import__("time").time() + 7200
	result = await run_comparison("Test", ip="1.2.3.4")

	assert result["error"] is not None
	assert "Rate limit" in result["error"]