File size: 5,884 Bytes
208fbf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import sys
import os

_this_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, _this_dir)
sys.path.insert(0, os.path.dirname(_this_dir))

import numpy as np

from validation import christoffersen_test, print_validation_report, diebold_mariano_test
import pytest

def test_diebold_mariano_mse():
    actual = np.random.randn(50)
    pred1 = actual + np.random.randn(50) * 0.1  # Low error
    pred2 = actual + np.random.randn(50) * 1.0  # High error
    
    result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MSE')
    assert result['significant'] is True
    assert result['winner'] == 'Model 1'
    assert result['stat'] < 0  # MSE1 < MSE2 so mean_d < 0
    assert 0 <= result['p_value'] <= 1.0

def test_diebold_mariano_mae():
    actual = np.random.randn(50)
    pred1 = actual + np.random.randn(50) * 0.1
    pred2 = actual + np.random.randn(50) * 1.0
    
    result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MAE')
    assert result['significant'] is True
    assert result['winner'] == 'Model 1'

def test_diebold_mariano_inconclusive():
    actual = np.random.randn(50)
    pred1 = actual + np.random.randn(50) * 0.5
    pred2 = actual + np.random.randn(50) * 0.5
    
    result = diebold_mariano_test(actual, pred1, pred2, h=1)
    if not result['significant']:
        assert result['winner'] == 'Inconclusive'

def test_diebold_mariano_small_sample():
    actual = np.array([1, 2, 3])
    pred1 = np.array([1, 2, 3])
    pred2 = np.array([1, 2, 3])
    result = diebold_mariano_test(actual, pred1, pred2)
    assert result['p_value'] == 1.0
    assert result['winner'] == 'Inconclusive'
    
def test_diebold_mariano_invalid_loss():
    with pytest.raises(ValueError, match="loss_type must be"):
        diebold_mariano_test([1]*10, [1]*10, [1]*10, loss_type='INVALID')

def test_christoffersen_test_basic():
    returns = np.random.randn(100) * 0.01
    VaR = np.full(100, -0.015)
    
    returns[10] = -0.03
    returns[20] = -0.03
    returns[30] = -0.03
    returns[31] = -0.03
    returns[50] = -0.03
    
    result = christoffersen_test(returns, VaR, target_alpha=0.05)
    assert 'unconditional_coverage' in result
    assert 'independence' in result
    assert 'overall_pass' in result
    assert 'hit_rate_actual' in result
    
    assert isinstance(result['hit_rate_actual'], float)
    assert 0 <= result['unconditional_coverage']['p_value'] <= 1.0
    assert 0 <= result['independence']['p_value'] <= 1.0


def test_christoffersen_flags_too_few_var_breaches():
    """A zero-hit VaR sample should fail as over-conservative, not pass silently."""
    returns = np.full(252, 0.001)
    var_forecasts = np.full(252, 0.02)

    result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)

    assert result["overall_pass"] is False
    assert result["unconditional_coverage"]["pass"] is False
    assert result["hit_rate_actual"] == 0.0
    assert "too low" in result["diagnostic"]


def test_validation_report_keeps_renamed_model_winner(capsys):
    """The console report should still PASS when main.py has renamed Model 1."""
    dm_results = {
        "stat": 2.5,
        "p_value": 0.01,
        "winner": "Machine",
        "significant": True,
    }

    print_validation_report(dm_results=dm_results, model_name="Machine Portfolio")
    out = capsys.readouterr().out

    assert "PASS" in out
    assert "significantly outperformed baseline" in out


def test_validation_report_explains_conservative_var_failure(capsys):
    """The printed VaR failure should distinguish too-few hits from clustering."""
    var_results = {
        "hit_rate_target": 0.05,
        "hit_rate_actual": 0.01,
        "unconditional_coverage": {"p_value": 0.01, "pass": False},
        "independence": {"p_value": 0.80, "pass": True},
        "overall_pass": False,
        "diagnostic": "VaR breach rate is too low; risk limits look conservative or overestimated.",
    }

    print_validation_report(var_results=var_results)
    out = capsys.readouterr().out

    assert "too low" in out
    assert "conservative" in out

def test_christoffersen_fails_independence_on_clusters():
    """A VaR model with clustered breaches should fail the independence test."""
    returns = np.full(252, 0.001)
    var_forecasts = np.full(252, 0.02)

    # ~13 breaches (~5% of 252), but they all happen in a row
    for i in range(100, 113):
        returns[i] = -0.05

    result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)

    assert result["unconditional_coverage"]["pass"] is True  # ~5% total hit rate
    assert result["independence"]["pass"] is False          # But highly clustered
    assert result["overall_pass"] is False
    assert "clustered" in result["diagnostic"].lower()

@pytest.mark.parametrize("seed", [42, 1337, 9999, 123456, 7])
def test_monte_carlo_is_deterministic(seed):
    import pandas as pd
    from backtest import monte_carlo
    from config import DEFAULT_CONFIG
    
    tickers = ["AAPL", "MSFT"]
    weights = pd.Series([0.6, 0.4], index=tickers)
    exp_rets = pd.Series([0.08, 0.10], index=tickers)
    cov_mat = pd.DataFrame([[0.04, 0.02], [0.02, 0.05]], index=tickers, columns=tickers)
    capital = 100000.0
    
    cfg = DEFAULT_CONFIG.copy()
    cfg["monte_carlo_sims"] = 100
    cfg["monte_carlo_years"] = 0.5
    
    # Run twice with the same seed
    paths1, stats1 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
    paths2, stats2 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
    
    # Check exact match
    np.testing.assert_allclose(paths1, paths2, rtol=1e-8)
    
    # Check that another seed yields different results
    paths3, _ = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed + 1, return_paths=True)
    assert not np.allclose(paths1, paths3)