File size: 1,167 Bytes
16ae9d0
afbaef9
 
 
 
 
 
16ae9d0
afbaef9
 
16ae9d0
 
afbaef9
16ae9d0
 
afbaef9
16ae9d0
 
 
afbaef9
16ae9d0
 
 
 
afbaef9
 
16ae9d0
 
 
 
 
afbaef9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from datetime import datetime, timezone
from pathlib import Path
import sys

sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

from engine.analytics import AnalyticsEngine
from engine.backtest import build_benchmark_suite, run_backtest


def test_backtest_uses_holdout_suite_separate_from_tuning():
    suite = build_benchmark_suite(reference_time=datetime.now(timezone.utc))

    tuning_names = {scenario.name for scenario in suite.tuning}
    holdout_names = {scenario.name for scenario in suite.holdout}

    assert tuning_names
    assert holdout_names
    assert tuning_names.isdisjoint(holdout_names)


def test_sentiment_holdout_backtest_has_reasonable_accuracy_without_overcalling():
    suite = build_benchmark_suite(reference_time=datetime.now(timezone.utc))
    backtest = run_backtest(suite.holdout, engine=AnalyticsEngine())
    metrics = backtest["metrics"]

    assert metrics["scenario_count"] == len(suite.holdout)
    assert metrics["overall_accuracy"] >= 0.78
    assert metrics["decisive_precision"] >= 0.75
    assert metrics["mixed_accuracy"] >= 0.66
    assert metrics["overcall_rate"] <= 0.22
    assert metrics["coverage"] >= 0.3