trinity-arena / test_engine.py
Julien Simon
feat: add comparison engine with best-value selection
3be2d14
import pytest
from unittest.mock import AsyncMock, patch
from engine import run_comparison, pick_best_value
from config import MODELS
def test_pick_best_value_cheapest_above_threshold():
scores = {"nano": 8, "mini": 9, "large": 9, "reasoning": "all good"}
costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
best = pick_best_value(scores, costs, threshold=7)
assert best == "large"
def test_pick_best_value_free_model_scores_below_threshold():
scores = {"nano": 5, "mini": 8, "large": 6, "reasoning": "nano and large weak"}
costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
best = pick_best_value(scores, costs, threshold=7)
assert best == "mini"
def test_pick_best_value_none_above_threshold():
scores = {"nano": 3, "mini": 5, "large": 6, "reasoning": "all weak"}
costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
best = pick_best_value(scores, costs, threshold=7)
assert best is None
def test_pick_best_value_tie_goes_to_cheapest():
scores = {"nano": 8, "mini": 8, "large": 8, "reasoning": "all equal"}
costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
best = pick_best_value(scores, costs, threshold=7)
assert best == "large"
@pytest.mark.asyncio
async def test_run_comparison_prompt_too_long():
result = await run_comparison("x" * 2001, ip="1.2.3.4")
assert result["error"] is not None
assert "2,000" in result["error"]
@pytest.mark.asyncio
async def test_run_comparison_success():
mock_model_results = {
"nano": {"content": "Nano response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
"mini": {"content": "Mini response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
"large": {"content": "Large response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
}
mock_scores = {"nano": 8, "mini": 9, "large": 9, "reasoning": "all good"}
with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
patch("engine.rate_limiter") as mock_limiter:
mock_limiter.check.return_value = True
result = await run_comparison("Write hello", ip="1.2.3.4")
assert result["error"] is None
assert result["responses"]["nano"]["content"] == "Nano response"
assert result["scores"]["nano"] == 8
assert result["best_value"] is not None
@pytest.mark.asyncio
async def test_run_comparison_all_models_fail():
mock_model_results = {
"nano": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
"mini": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
"large": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
}
with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
patch("engine.rate_limiter") as mock_limiter:
mock_limiter.check.return_value = True
result = await run_comparison("Test", ip="1.2.3.4")
assert result["error"] is not None
assert "All models failed" in result["error"]