Spaces:

engineportf
/

portfolio-engine

Sleeping

App Files Files Community

portfolio-engine / tests /test_validation.py

engineportf

Initial Deployment from Local Engine

208fbf8 verified 16 days ago

Raw

History Blame Contribute Delete

5.88 kB

	import sys
	import os

	_this_dir = os.path.dirname(os.path.abspath(__file__))
	sys.path.insert(0, _this_dir)
	sys.path.insert(0, os.path.dirname(_this_dir))

	import numpy as np

	from validation import christoffersen_test, print_validation_report, diebold_mariano_test
	import pytest

	def test_diebold_mariano_mse():
	actual = np.random.randn(50)
	pred1 = actual + np.random.randn(50) * 0.1 # Low error
	pred2 = actual + np.random.randn(50) * 1.0 # High error

	result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MSE')
	assert result['significant'] is True
	assert result['winner'] == 'Model 1'
	assert result['stat'] < 0 # MSE1 < MSE2 so mean_d < 0
	assert 0 <= result['p_value'] <= 1.0

	def test_diebold_mariano_mae():
	actual = np.random.randn(50)
	pred1 = actual + np.random.randn(50) * 0.1
	pred2 = actual + np.random.randn(50) * 1.0

	result = diebold_mariano_test(actual, pred1, pred2, h=1, loss_type='MAE')
	assert result['significant'] is True
	assert result['winner'] == 'Model 1'

	def test_diebold_mariano_inconclusive():
	actual = np.random.randn(50)
	pred1 = actual + np.random.randn(50) * 0.5
	pred2 = actual + np.random.randn(50) * 0.5

	result = diebold_mariano_test(actual, pred1, pred2, h=1)
	if not result['significant']:
	assert result['winner'] == 'Inconclusive'

	def test_diebold_mariano_small_sample():
	actual = np.array([1, 2, 3])
	pred1 = np.array([1, 2, 3])
	pred2 = np.array([1, 2, 3])
	result = diebold_mariano_test(actual, pred1, pred2)
	assert result['p_value'] == 1.0
	assert result['winner'] == 'Inconclusive'

	def test_diebold_mariano_invalid_loss():
	with pytest.raises(ValueError, match="loss_type must be"):
	diebold_mariano_test([1]10, [1]10, [1]*10, loss_type='INVALID')

	def test_christoffersen_test_basic():
	returns = np.random.randn(100) * 0.01
	VaR = np.full(100, -0.015)

	returns[10] = -0.03
	returns[20] = -0.03
	returns[30] = -0.03
	returns[31] = -0.03
	returns[50] = -0.03

	result = christoffersen_test(returns, VaR, target_alpha=0.05)
	assert 'unconditional_coverage' in result
	assert 'independence' in result
	assert 'overall_pass' in result
	assert 'hit_rate_actual' in result

	assert isinstance(result['hit_rate_actual'], float)
	assert 0 <= result['unconditional_coverage']['p_value'] <= 1.0
	assert 0 <= result['independence']['p_value'] <= 1.0


	def test_christoffersen_flags_too_few_var_breaches():
	"""A zero-hit VaR sample should fail as over-conservative, not pass silently."""
	returns = np.full(252, 0.001)
	var_forecasts = np.full(252, 0.02)

	result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)

	assert result["overall_pass"] is False
	assert result["unconditional_coverage"]["pass"] is False
	assert result["hit_rate_actual"] == 0.0
	assert "too low" in result["diagnostic"]


	def test_validation_report_keeps_renamed_model_winner(capsys):
	"""The console report should still PASS when main.py has renamed Model 1."""
	dm_results = {
	"stat": 2.5,
	"p_value": 0.01,
	"winner": "Machine",
	"significant": True,
	}

	print_validation_report(dm_results=dm_results, model_name="Machine Portfolio")
	out = capsys.readouterr().out

	assert "PASS" in out
	assert "significantly outperformed baseline" in out


	def test_validation_report_explains_conservative_var_failure(capsys):
	"""The printed VaR failure should distinguish too-few hits from clustering."""
	var_results = {
	"hit_rate_target": 0.05,
	"hit_rate_actual": 0.01,
	"unconditional_coverage": {"p_value": 0.01, "pass": False},
	"independence": {"p_value": 0.80, "pass": True},
	"overall_pass": False,
	"diagnostic": "VaR breach rate is too low; risk limits look conservative or overestimated.",
	}

	print_validation_report(var_results=var_results)
	out = capsys.readouterr().out

	assert "too low" in out
	assert "conservative" in out

	def test_christoffersen_fails_independence_on_clusters():
	"""A VaR model with clustered breaches should fail the independence test."""
	returns = np.full(252, 0.001)
	var_forecasts = np.full(252, 0.02)

	# ~13 breaches (~5% of 252), but they all happen in a row
	for i in range(100, 113):
	returns[i] = -0.05

	result = christoffersen_test(returns, var_forecasts, target_alpha=0.05)

	assert result["unconditional_coverage"]["pass"] is True # ~5% total hit rate
	assert result["independence"]["pass"] is False # But highly clustered
	assert result["overall_pass"] is False
	assert "clustered" in result["diagnostic"].lower()

	@pytest.mark.parametrize("seed", [42, 1337, 9999, 123456, 7])
	def test_monte_carlo_is_deterministic(seed):
	import pandas as pd
	from backtest import monte_carlo
	from config import DEFAULT_CONFIG

	tickers = ["AAPL", "MSFT"]
	weights = pd.Series([0.6, 0.4], index=tickers)
	exp_rets = pd.Series([0.08, 0.10], index=tickers)
	cov_mat = pd.DataFrame([[0.04, 0.02], [0.02, 0.05]], index=tickers, columns=tickers)
	capital = 100000.0

	cfg = DEFAULT_CONFIG.copy()
	cfg["monte_carlo_sims"] = 100
	cfg["monte_carlo_years"] = 0.5

	# Run twice with the same seed
	paths1, stats1 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)
	paths2, stats2 = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed, return_paths=True)

	# Check exact match
	np.testing.assert_allclose(paths1, paths2, rtol=1e-8)

	# Check that another seed yields different results
	paths3, _ = monte_carlo(weights, exp_rets, cov_mat, capital, cfg, seed=seed + 1, return_paths=True)
	assert not np.allclose(paths1, paths3)