Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| _this_dir = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, _this_dir) | |
| sys.path.insert(0, os.path.dirname(_this_dir)) | |
| import numpy as np | |
| from validation import christoffersen_test, print_validation_report, diebold_mariano_test | |
| import pytest | |
| def test_diebold_mariano_mse(): | |
| actual = np.random.randn(50) | |
| pred1 = actual + np.random.randn(50) * 0.1 # Low error | |
| pred2 = actual + np.random.randn(50) * 1.0 # High error | |
| result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MSE') | |
| assert result['significant'] is True | |
| assert result['winner'] == 'Model 1' | |
| assert result['stat'] < 0 # MSE1 < MSE2 so mean_d < 0 | |
| assert 0 <= result['p_value'] <= 1.0 | |
| def test_diebold_mariano_mae(): | |
| actual = np.random.randn(50) | |
| pred1 = actual + np.random.randn(50) * 0.1 | |
| pred2 = actual + np.random.randn(50) * 1.0 | |
| result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MAE') | |
| assert result['significant'] is True | |
| assert result['winner'] == 'Model 1' | |
| def test_diebold_mariano_inconclusive(): | |
| actual = np.random.randn(50) | |
| pred1 = actual + np.random.randn(50) * 0.5 | |
| pred2 = actual + np.random.randn(50) * 0.5 | |
| result = diebold_mariano_test(actual, pred1, pred2, h=1) | |
| if not result['significant']: | |
| assert result['winner'] == 'Inconclusive' | |
| def test_diebold_mariano_small_sample(): | |
| actual = np.array([1, 2, 3]) | |
| pred1 = np.array([1, 2, 3]) | |
| pred2 = np.array([1, 2, 3]) | |
| result = diebold_mariano_test(actual, pred1, pred2) | |
| assert result['p_value'] == 1.0 | |
| assert result['winner'] == 'Inconclusive' | |
| def test_diebold_mariano_invalid_loss(): | |
| with pytest.raises(ValueError, match="loss_type must be"): | |
| diebold_mariano_test([1]*10, [1]*10, [1]*10, loss_type='INVALID') | |
| def test_christoffersen_test_basic(): | |
| returns = np.random.randn(100) * 0.01 | |
| VaR = np.full(100, -0.015) | |
| returns[10] = -0.03 | |
| returns[20] = -0.03 | |
| returns[30] = -0.03 | |
| returns[31] = -0.03 | |
| returns[50] = -0.03 | |
| result = christoffersen_test(returns, VaR, target_alpha=0.05) | |
| assert 'unconditional_coverage' in result | |
| assert 'independence' in result | |
| assert 'overall_pass' in result | |
| assert 'hit_rate_actual' in result | |
| assert isinstance(result['hit_rate_actual'], float) | |
| assert 0 <= result['unconditional_coverage']['p_value'] <= 1.0 | |
| assert 0 <= result['independence']['p_value'] <= 1.0 | |
| def test_christoffersen_flags_too_few_var_breaches(): | |
| """A zero-hit VaR sample should fail as over-conservative, not pass silently.""" | |
| returns = np.full(252, 0.001) | |
| var_forecasts = np.full(252, 0.02) | |
| result = christoffersen_test(returns, var_forecasts, target_alpha=0.05) | |
| assert result["overall_pass"] is False | |
| assert result["unconditional_coverage"]["pass"] is False | |
| assert result["hit_rate_actual"] == 0.0 | |
| assert "too low" in result["diagnostic"] | |
| def test_validation_report_keeps_renamed_model_winner(capsys): | |
| """The console report should still PASS when main.py has renamed Model 1.""" | |
| dm_results = { | |
| "stat": 2.5, | |
| "p_value": 0.01, | |
| "winner": "Machine", | |
| "significant": True, | |
| } | |
| print_validation_report(dm_results=dm_results, model_name="Machine Portfolio") | |
| out = capsys.readouterr().out | |
| assert "PASS" in out | |
| assert "significantly outperformed baseline" in out | |
| def test_validation_report_explains_conservative_var_failure(capsys): | |
| """The printed VaR failure should distinguish too-few hits from clustering.""" | |
| var_results = { | |
| "hit_rate_target": 0.05, | |
| "hit_rate_actual": 0.01, | |
| "unconditional_coverage": {"p_value": 0.01, "pass": False}, | |
| "independence": {"p_value": 0.80, "pass": True}, | |
| "overall_pass": False, | |
| "diagnostic": "VaR breach rate is too low; risk limits look conservative or overestimated.", | |
| } | |
| print_validation_report(var_results=var_results) | |
| out = capsys.readouterr().out | |
| assert "too low" in out | |
| assert "conservative" in out | |
| def test_christoffersen_fails_independence_on_clusters(): | |
| """A VaR model with clustered breaches should fail the independence test.""" | |
| returns = np.full(252, 0.001) | |
| var_forecasts = np.full(252, 0.02) | |
| # ~13 breaches (~5% of 252), but they all happen in a row | |
| for i in range(100, 113): | |
| returns[i] = -0.05 | |
| result = christoffersen_test(returns, var_forecasts, target_alpha=0.05) | |
| assert result["unconditional_coverage"]["pass"] is True # ~5% total hit rate | |
| assert result["independence"]["pass"] is False # But highly clustered | |
| assert result["overall_pass"] is False | |
| assert "clustered" in result["diagnostic"].lower() | |
| def test_monte_carlo_is_deterministic(seed): | |
| import pandas as pd | |
| from backtest import monte_carlo | |
| from config import DEFAULT_CONFIG | |
| tickers = ["AAPL", "MSFT"] | |
| weights = pd.Series([0.6, 0.4], index=tickers) | |
| exp_rets = pd.Series([0.08, 0.10], index=tickers) | |
| cov_mat = pd.DataFrame([[0.04, 0.02], [0.02, 0.05]], index=tickers, columns=tickers) | |
| capital = 100000.0 | |
| cfg = DEFAULT_CONFIG.copy() | |
| cfg["monte_carlo_sims"] = 100 | |
| cfg["monte_carlo_years"] = 0.5 | |
| # Run twice with the same seed | |
| paths1, stats1 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True) | |
| paths2, stats2 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True) | |
| # Check exact match | |
| np.testing.assert_allclose(paths1, paths2, rtol=1e-8) | |
| # Check that another seed yields different results | |
| paths3, _ = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed + 1, return_paths=True) | |
| assert not np.allclose(paths1, paths3) | |