Spaces:

juliensimon
/

trinity-arena

Running on Zero

Julien Simon

feat: add comparison engine with best-value selection

3be2d14 1 day ago

3.25 kB

	import pytest
	from unittest.mock import AsyncMock, patch
	from engine import run_comparison, pick_best_value
	from config import MODELS


	def test_pick_best_value_cheapest_above_threshold():
	scores = {"nano": 8, "mini": 9, "large": 9, "reasoning": "all good"}
	costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
	best = pick_best_value(scores, costs, threshold=7)
	assert best == "large"


	def test_pick_best_value_free_model_scores_below_threshold():
	scores = {"nano": 5, "mini": 8, "large": 6, "reasoning": "nano and large weak"}
	costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
	best = pick_best_value(scores, costs, threshold=7)
	assert best == "mini"


	def test_pick_best_value_none_above_threshold():
	scores = {"nano": 3, "mini": 5, "large": 6, "reasoning": "all weak"}
	costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
	best = pick_best_value(scores, costs, threshold=7)
	assert best is None


	def test_pick_best_value_tie_goes_to_cheapest():
	scores = {"nano": 8, "mini": 8, "large": 8, "reasoning": "all equal"}
	costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
	best = pick_best_value(scores, costs, threshold=7)
	assert best == "large"


	@pytest.mark.asyncio
	async def test_run_comparison_prompt_too_long():
	result = await run_comparison("x" * 2001, ip="1.2.3.4")
	assert result["error"] is not None
	assert "2,000" in result["error"]


	@pytest.mark.asyncio
	async def test_run_comparison_success():
	mock_model_results = {
	"nano": {"content": "Nano response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
	"mini": {"content": "Mini response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
	"large": {"content": "Large response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
	}
	mock_scores = {"nano": 8, "mini": 9, "large": 9, "reasoning": "all good"}

	with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
	patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
	patch("engine.rate_limiter") as mock_limiter:
	mock_limiter.check.return_value = True
	result = await run_comparison("Write hello", ip="1.2.3.4")

	assert result["error"] is None
	assert result["responses"]["nano"]["content"] == "Nano response"
	assert result["scores"]["nano"] == 8
	assert result["best_value"] is not None


	@pytest.mark.asyncio
	async def test_run_comparison_all_models_fail():
	mock_model_results = {
	"nano": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
	"mini": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
	"large": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
	}

	with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
	patch("engine.rate_limiter") as mock_limiter:
	mock_limiter.check.return_value = True
	result = await run_comparison("Test", ip="1.2.3.4")

	assert result["error"] is not None
	assert "All models failed" in result["error"]