portfolio-engine / tests /test_validation.py
engineportf's picture
Initial Deployment from Local Engine
208fbf8 verified
Raw
History Blame Contribute Delete
5.88 kB
import sys
import os
_this_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, _this_dir)
sys.path.insert(0, os.path.dirname(_this_dir))
import numpy as np
from validation import christoffersen_test, print_validation_report, diebold_mariano_test
import pytest
def test_diebold_mariano_mse():
actual = np.random.randn(50)
pred1 = actual + np.random.randn(50) * 0.1 # Low error
pred2 = actual + np.random.randn(50) * 1.0 # High error
result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MSE')
assert result['significant'] is True
assert result['winner'] == 'Model 1'
assert result['stat'] < 0 # MSE1 < MSE2 so mean_d < 0
assert 0 <= result['p_value'] <= 1.0
def test_diebold_mariano_mae():
actual = np.random.randn(50)
pred1 = actual + np.random.randn(50) * 0.1
pred2 = actual + np.random.randn(50) * 1.0
result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MAE')
assert result['significant'] is True
assert result['winner'] == 'Model 1'
def test_diebold_mariano_inconclusive():
actual = np.random.randn(50)
pred1 = actual + np.random.randn(50) * 0.5
pred2 = actual + np.random.randn(50) * 0.5
result = diebold_mariano_test(actual, pred1, pred2, h=1)
if not result['significant']:
assert result['winner'] == 'Inconclusive'
def test_diebold_mariano_small_sample():
actual = np.array([1, 2, 3])
pred1 = np.array([1, 2, 3])
pred2 = np.array([1, 2, 3])
result = diebold_mariano_test(actual, pred1, pred2)
assert result['p_value'] == 1.0
assert result['winner'] == 'Inconclusive'
def test_diebold_mariano_invalid_loss():
with pytest.raises(ValueError, match="loss_type must be"):
diebold_mariano_test([1]*10, [1]*10, [1]*10, loss_type='INVALID')
def test_christoffersen_test_basic():
returns = np.random.randn(100) * 0.01
VaR = np.full(100, -0.015)
returns[10] = -0.03
returns[20] = -0.03
returns[30] = -0.03
returns[31] = -0.03
returns[50] = -0.03
result = christoffersen_test(returns, VaR, target_alpha=0.05)
assert 'unconditional_coverage' in result
assert 'independence' in result
assert 'overall_pass' in result
assert 'hit_rate_actual' in result
assert isinstance(result['hit_rate_actual'], float)
assert 0 <= result['unconditional_coverage']['p_value'] <= 1.0
assert 0 <= result['independence']['p_value'] <= 1.0
def test_christoffersen_flags_too_few_var_breaches():
"""A zero-hit VaR sample should fail as over-conservative, not pass silently."""
returns = np.full(252, 0.001)
var_forecasts = np.full(252, 0.02)
result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)
assert result["overall_pass"] is False
assert result["unconditional_coverage"]["pass"] is False
assert result["hit_rate_actual"] == 0.0
assert "too low" in result["diagnostic"]
def test_validation_report_keeps_renamed_model_winner(capsys):
"""The console report should still PASS when main.py has renamed Model 1."""
dm_results = {
"stat": 2.5,
"p_value": 0.01,
"winner": "Machine",
"significant": True,
}
print_validation_report(dm_results=dm_results, model_name="Machine Portfolio")
out = capsys.readouterr().out
assert "PASS" in out
assert "significantly outperformed baseline" in out
def test_validation_report_explains_conservative_var_failure(capsys):
"""The printed VaR failure should distinguish too-few hits from clustering."""
var_results = {
"hit_rate_target": 0.05,
"hit_rate_actual": 0.01,
"unconditional_coverage": {"p_value": 0.01, "pass": False},
"independence": {"p_value": 0.80, "pass": True},
"overall_pass": False,
"diagnostic": "VaR breach rate is too low; risk limits look conservative or overestimated.",
}
print_validation_report(var_results=var_results)
out = capsys.readouterr().out
assert "too low" in out
assert "conservative" in out
def test_christoffersen_fails_independence_on_clusters():
"""A VaR model with clustered breaches should fail the independence test."""
returns = np.full(252, 0.001)
var_forecasts = np.full(252, 0.02)
# ~13 breaches (~5% of 252), but they all happen in a row
for i in range(100, 113):
returns[i] = -0.05
result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)
assert result["unconditional_coverage"]["pass"] is True # ~5% total hit rate
assert result["independence"]["pass"] is False # But highly clustered
assert result["overall_pass"] is False
assert "clustered" in result["diagnostic"].lower()
@pytest.mark.parametrize("seed", [42, 1337, 9999, 123456, 7])
def test_monte_carlo_is_deterministic(seed):
import pandas as pd
from backtest import monte_carlo
from config import DEFAULT_CONFIG
tickers = ["AAPL", "MSFT"]
weights = pd.Series([0.6, 0.4], index=tickers)
exp_rets = pd.Series([0.08, 0.10], index=tickers)
cov_mat = pd.DataFrame([[0.04, 0.02], [0.02, 0.05]], index=tickers, columns=tickers)
capital = 100000.0
cfg = DEFAULT_CONFIG.copy()
cfg["monte_carlo_sims"] = 100
cfg["monte_carlo_years"] = 0.5
# Run twice with the same seed
paths1, stats1 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
paths2, stats2 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
# Check exact match
np.testing.assert_allclose(paths1, paths2, rtol=1e-8)
# Check that another seed yields different results
paths3, _ = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed + 1, return_paths=True)
assert not np.allclose(paths1, paths3)