Spaces:

engineportf
/

portfolio-engine

Sleeping

File size: 5,884 Bytes

208fbf8

import sys
import os

_this_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, _this_dir)
sys.path.insert(0, os.path.dirname(_this_dir))

import numpy as np

from validation import christoffersen_test, print_validation_report, diebold_mariano_test
import pytest

def test_diebold_mariano_mse():
    actual = np.random.randn(50)
    pred1 = actual + np.random.randn(50) * 0.1  # Low error
    pred2 = actual + np.random.randn(50) * 1.0  # High error
    
    result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MSE')
    assert result['significant'] is True
    assert result['winner'] == 'Model 1'
    assert result['stat'] < 0  # MSE1 < MSE2 so mean_d < 0
    assert 0 <= result['p_value'] <= 1.0

def test_diebold_mariano_mae():
    actual = np.random.randn(50)
    pred1 = actual + np.random.randn(50) * 0.1
    pred2 = actual + np.random.randn(50) * 1.0
    
    result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MAE')
    assert result['significant'] is True
    assert result['winner'] == 'Model 1'

def test_diebold_mariano_inconclusive():
    actual = np.random.randn(50)
    pred1 = actual + np.random.randn(50) * 0.5
    pred2 = actual + np.random.randn(50) * 0.5
    
    result = diebold_mariano_test(actual, pred1, pred2, h=1)
    if not result['significant']:
        assert result['winner'] == 'Inconclusive'

def test_diebold_mariano_small_sample():
    actual = np.array([1, 2, 3])
    pred1 = np.array([1, 2, 3])
    pred2 = np.array([1, 2, 3])
    result = diebold_mariano_test(actual, pred1, pred2)
    assert result['p_value'] == 1.0
    assert result['winner'] == 'Inconclusive'
    
def test_diebold_mariano_invalid_loss():
    with pytest.raises(ValueError, match="loss_type must be"):
        diebold_mariano_test([1]*10, [1]*10, [1]*10, loss_type='INVALID')

def test_christoffersen_test_basic():
    returns = np.random.randn(100) * 0.01
    VaR = np.full(100, -0.015)
    
    returns[10] = -0.03
    returns[20] = -0.03
    returns[30] = -0.03
    returns[31] = -0.03
    returns[50] = -0.03
    
    result = christoffersen_test(returns, VaR, target_alpha=0.05)
    assert 'unconditional_coverage' in result
    assert 'independence' in result
    assert 'overall_pass' in result
    assert 'hit_rate_actual' in result
    
    assert isinstance(result['hit_rate_actual'], float)
    assert 0 <= result['unconditional_coverage']['p_value'] <= 1.0
    assert 0 <= result['independence']['p_value'] <= 1.0


def test_christoffersen_flags_too_few_var_breaches():
    """A zero-hit VaR sample should fail as over-conservative, not pass silently."""
    returns = np.full(252, 0.001)
    var_forecasts = np.full(252, 0.02)

    result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)

    assert result["overall_pass"] is False
    assert result["unconditional_coverage"]["pass"] is False
    assert result["hit_rate_actual"] == 0.0
    assert "too low" in result["diagnostic"]


def test_validation_report_keeps_renamed_model_winner(capsys):
    """The console report should still PASS when main.py has renamed Model 1."""
    dm_results = {
        "stat": 2.5,
        "p_value": 0.01,
        "winner": "Machine",
        "significant": True,
    }

    print_validation_report(dm_results=dm_results, model_name="Machine Portfolio")
    out = capsys.readouterr().out

    assert "PASS" in out
    assert "significantly outperformed baseline" in out


def test_validation_report_explains_conservative_var_failure(capsys):
    """The printed VaR failure should distinguish too-few hits from clustering."""
    var_results = {
        "hit_rate_target": 0.05,
        "hit_rate_actual": 0.01,
        "unconditional_coverage": {"p_value": 0.01, "pass": False},
        "independence": {"p_value": 0.80, "pass": True},
        "overall_pass": False,
        "diagnostic": "VaR breach rate is too low; risk limits look conservative or overestimated.",
    }

    print_validation_report(var_results=var_results)
    out = capsys.readouterr().out

    assert "too low" in out
    assert "conservative" in out

def test_christoffersen_fails_independence_on_clusters():
    """A VaR model with clustered breaches should fail the independence test."""
    returns = np.full(252, 0.001)
    var_forecasts = np.full(252, 0.02)

    # ~13 breaches (~5% of 252), but they all happen in a row
    for i in range(100, 113):
        returns[i] = -0.05

    result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)

    assert result["unconditional_coverage"]["pass"] is True  # ~5% total hit rate
    assert result["independence"]["pass"] is False          # But highly clustered
    assert result["overall_pass"] is False
    assert "clustered" in result["diagnostic"].lower()

@pytest.mark.parametrize("seed", [42, 1337, 9999, 123456, 7])
def test_monte_carlo_is_deterministic(seed):
    import pandas as pd
    from backtest import monte_carlo
    from config import DEFAULT_CONFIG
    
    tickers = ["AAPL", "MSFT"]
    weights = pd.Series([0.6, 0.4], index=tickers)
    exp_rets = pd.Series([0.08, 0.10], index=tickers)
    cov_mat = pd.DataFrame([[0.04, 0.02], [0.02, 0.05]], index=tickers, columns=tickers)
    capital = 100000.0
    
    cfg = DEFAULT_CONFIG.copy()
    cfg["monte_carlo_sims"] = 100
    cfg["monte_carlo_years"] = 0.5
    
    # Run twice with the same seed
    paths1, stats1 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
    paths2, stats2 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
    
    # Check exact match
    np.testing.assert_allclose(paths1, paths2, rtol=1e-8)
    
    # Check that another seed yields different results
    paths3, _ = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed + 1, return_paths=True)
    assert not np.allclose(paths1, paths3)