from datetime import datetime, timezone from pathlib import Path import sys sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from engine.analytics import AnalyticsEngine from engine.backtest import build_benchmark_suite, run_backtest def test_backtest_uses_holdout_suite_separate_from_tuning(): suite = build_benchmark_suite(reference_time=datetime.now(timezone.utc)) tuning_names = {scenario.name for scenario in suite.tuning} holdout_names = {scenario.name for scenario in suite.holdout} assert tuning_names assert holdout_names assert tuning_names.isdisjoint(holdout_names) def test_sentiment_holdout_backtest_has_reasonable_accuracy_without_overcalling(): suite = build_benchmark_suite(reference_time=datetime.now(timezone.utc)) backtest = run_backtest(suite.holdout, engine=AnalyticsEngine()) metrics = backtest["metrics"] assert metrics["scenario_count"] == len(suite.holdout) assert metrics["overall_accuracy"] >= 0.78 assert metrics["decisive_precision"] >= 0.75 assert metrics["mixed_accuracy"] >= 0.66 assert metrics["overcall_rate"] <= 0.22 assert metrics["coverage"] >= 0.3