import sys import os _this_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, _this_dir) sys.path.insert(0, os.path.dirname(_this_dir)) import numpy as np from validation import christoffersen_test, print_validation_report, diebold_mariano_test import pytest def test_diebold_mariano_mse(): actual = np.random.randn(50) pred1 = actual + np.random.randn(50) * 0.1 # Low error pred2 = actual + np.random.randn(50) * 1.0 # High error result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MSE') assert result['significant'] is True assert result['winner'] == 'Model 1' assert result['stat'] < 0 # MSE1 < MSE2 so mean_d < 0 assert 0 <= result['p_value'] <= 1.0 def test_diebold_mariano_mae(): actual = np.random.randn(50) pred1 = actual + np.random.randn(50) * 0.1 pred2 = actual + np.random.randn(50) * 1.0 result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MAE') assert result['significant'] is True assert result['winner'] == 'Model 1' def test_diebold_mariano_inconclusive(): actual = np.random.randn(50) pred1 = actual + np.random.randn(50) * 0.5 pred2 = actual + np.random.randn(50) * 0.5 result = diebold_mariano_test(actual, pred1, pred2, h=1) if not result['significant']: assert result['winner'] == 'Inconclusive' def test_diebold_mariano_small_sample(): actual = np.array([1, 2, 3]) pred1 = np.array([1, 2, 3]) pred2 = np.array([1, 2, 3]) result = diebold_mariano_test(actual, pred1, pred2) assert result['p_value'] == 1.0 assert result['winner'] == 'Inconclusive' def test_diebold_mariano_invalid_loss(): with pytest.raises(ValueError, match="loss_type must be"): diebold_mariano_test([1]*10, [1]*10, [1]*10, loss_type='INVALID') def test_christoffersen_test_basic(): returns = np.random.randn(100) * 0.01 VaR = np.full(100, -0.015) returns[10] = -0.03 returns[20] = -0.03 returns[30] = -0.03 returns[31] = -0.03 returns[50] = -0.03 result = christoffersen_test(returns, VaR, target_alpha=0.05) assert 'unconditional_coverage' in result assert 'independence' in result assert 'overall_pass' in result assert 'hit_rate_actual' in result assert isinstance(result['hit_rate_actual'], float) assert 0 <= result['unconditional_coverage']['p_value'] <= 1.0 assert 0 <= result['independence']['p_value'] <= 1.0 def test_christoffersen_flags_too_few_var_breaches(): """A zero-hit VaR sample should fail as over-conservative, not pass silently.""" returns = np.full(252, 0.001) var_forecasts = np.full(252, 0.02) result = christoffersen_test(returns, var_forecasts, target_alpha=0.05) assert result["overall_pass"] is False assert result["unconditional_coverage"]["pass"] is False assert result["hit_rate_actual"] == 0.0 assert "too low" in result["diagnostic"] def test_validation_report_keeps_renamed_model_winner(capsys): """The console report should still PASS when main.py has renamed Model 1.""" dm_results = { "stat": 2.5, "p_value": 0.01, "winner": "Machine", "significant": True, } print_validation_report(dm_results=dm_results, model_name="Machine Portfolio") out = capsys.readouterr().out assert "PASS" in out assert "significantly outperformed baseline" in out def test_validation_report_explains_conservative_var_failure(capsys): """The printed VaR failure should distinguish too-few hits from clustering.""" var_results = { "hit_rate_target": 0.05, "hit_rate_actual": 0.01, "unconditional_coverage": {"p_value": 0.01, "pass": False}, "independence": {"p_value": 0.80, "pass": True}, "overall_pass": False, "diagnostic": "VaR breach rate is too low; risk limits look conservative or overestimated.", } print_validation_report(var_results=var_results) out = capsys.readouterr().out assert "too low" in out assert "conservative" in out def test_christoffersen_fails_independence_on_clusters(): """A VaR model with clustered breaches should fail the independence test.""" returns = np.full(252, 0.001) var_forecasts = np.full(252, 0.02) # ~13 breaches (~5% of 252), but they all happen in a row for i in range(100, 113): returns[i] = -0.05 result = christoffersen_test(returns, var_forecasts, target_alpha=0.05) assert result["unconditional_coverage"]["pass"] is True # ~5% total hit rate assert result["independence"]["pass"] is False # But highly clustered assert result["overall_pass"] is False assert "clustered" in result["diagnostic"].lower() @pytest.mark.parametrize("seed", [42, 1337, 9999, 123456, 7]) def test_monte_carlo_is_deterministic(seed): import pandas as pd from backtest import monte_carlo from config import DEFAULT_CONFIG tickers = ["AAPL", "MSFT"] weights = pd.Series([0.6, 0.4], index=tickers) exp_rets = pd.Series([0.08, 0.10], index=tickers) cov_mat = pd.DataFrame([[0.04, 0.02], [0.02, 0.05]], index=tickers, columns=tickers) capital = 100000.0 cfg = DEFAULT_CONFIG.copy() cfg["monte_carlo_sims"] = 100 cfg["monte_carlo_years"] = 0.5 # Run twice with the same seed paths1, stats1 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True) paths2, stats2 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True) # Check exact match np.testing.assert_allclose(paths1, paths2, rtol=1e-8) # Check that another seed yields different results paths3, _ = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed + 1, return_paths=True) assert not np.allclose(paths1, paths3)