Spaces:

juliensimon
/

trinity-arena

Running on Zero

File size: 3,248 Bytes

3be2d14

import pytest
from unittest.mock import AsyncMock, patch
from engine import run_comparison, pick_best_value
from config import MODELS


def test_pick_best_value_cheapest_above_threshold():
    scores = {"nano": 8, "mini": 9, "large": 9, "reasoning": "all good"}
    costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
    best = pick_best_value(scores, costs, threshold=7)
    assert best == "large"


def test_pick_best_value_free_model_scores_below_threshold():
    scores = {"nano": 5, "mini": 8, "large": 6, "reasoning": "nano and large weak"}
    costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
    best = pick_best_value(scores, costs, threshold=7)
    assert best == "mini"


def test_pick_best_value_none_above_threshold():
    scores = {"nano": 3, "mini": 5, "large": 6, "reasoning": "all weak"}
    costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
    best = pick_best_value(scores, costs, threshold=7)
    assert best is None


def test_pick_best_value_tie_goes_to_cheapest():
    scores = {"nano": 8, "mini": 8, "large": 8, "reasoning": "all equal"}
    costs = {"nano": 0.0001, "mini": 0.0003, "large": 0.0}
    best = pick_best_value(scores, costs, threshold=7)
    assert best == "large"


@pytest.mark.asyncio
async def test_run_comparison_prompt_too_long():
    result = await run_comparison("x" * 2001, ip="1.2.3.4")
    assert result["error"] is not None
    assert "2,000" in result["error"]


@pytest.mark.asyncio
async def test_run_comparison_success():
    mock_model_results = {
        "nano": {"content": "Nano response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
        "mini": {"content": "Mini response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
        "large": {"content": "Large response", "prompt_tokens": 10, "completion_tokens": 20, "error": None},
    }
    mock_scores = {"nano": 8, "mini": 9, "large": 9, "reasoning": "all good"}

    with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
         patch("engine.judge_responses", new_callable=AsyncMock, return_value=mock_scores), \
         patch("engine.rate_limiter") as mock_limiter:
        mock_limiter.check.return_value = True
        result = await run_comparison("Write hello", ip="1.2.3.4")

    assert result["error"] is None
    assert result["responses"]["nano"]["content"] == "Nano response"
    assert result["scores"]["nano"] == 8
    assert result["best_value"] is not None


@pytest.mark.asyncio
async def test_run_comparison_all_models_fail():
    mock_model_results = {
        "nano": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
        "mini": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
        "large": {"content": None, "prompt_tokens": 0, "completion_tokens": 0, "error": "timeout"},
    }

    with patch("engine.call_models_parallel", new_callable=AsyncMock, return_value=mock_model_results), \
         patch("engine.rate_limiter") as mock_limiter:
        mock_limiter.check.return_value = True
        result = await run_comparison("Test", ip="1.2.3.4")

    assert result["error"] is not None
    assert "All models failed" in result["error"]