Spaces:
Running
Running
| """ | |
| Tests for feature engineering functions. | |
| """ | |
| import pytest | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| from app.features import ( | |
| compute_returns, | |
| compute_sma, | |
| compute_ema, | |
| compute_rsi, | |
| compute_volatility, | |
| generate_symbol_features, | |
| ) | |
| class TestComputeReturns: | |
| def test_simple_returns(self): | |
| prices = pd.Series([100, 110, 105]) | |
| returns = compute_returns(prices) | |
| assert pd.isna(returns.iloc[0]) # First return is NaN | |
| assert abs(returns.iloc[1] - 0.10) < 0.001 # 10% return | |
| assert abs(returns.iloc[2] - (-0.0454545)) < 0.001 # -4.5% return | |
| def test_multi_period_returns(self): | |
| prices = pd.Series([100, 105, 110, 115]) | |
| returns = compute_returns(prices, periods=2) | |
| # 2-period return from 100 to 110 | |
| assert abs(returns.iloc[2] - 0.10) < 0.001 | |
| class TestComputeSMA: | |
| def test_simple_case(self): | |
| prices = pd.Series([1, 2, 3, 4, 5]) | |
| sma = compute_sma(prices, window=3) | |
| # SMA of last 3 values [3, 4, 5] = 4 | |
| assert abs(sma.iloc[-1] - 4.0) < 0.001 | |
| def test_handles_short_series(self): | |
| prices = pd.Series([1, 2]) | |
| sma = compute_sma(prices, window=5) | |
| # Should still produce values with min_periods=1 | |
| assert not sma.isna().all() | |
| class TestComputeEMA: | |
| def test_more_weight_to_recent(self): | |
| prices = pd.Series([1, 1, 1, 1, 5]) # Jump at end | |
| ema = compute_ema(prices, span=3) | |
| sma = compute_sma(prices, window=3) | |
| # EMA should be higher than SMA due to recent jump | |
| assert ema.iloc[-1] > sma.iloc[-1] | |
| class TestComputeRSI: | |
| def test_rsi_range(self): | |
| # Generate random walk | |
| np.random.seed(42) | |
| prices = pd.Series(100 + np.cumsum(np.random.randn(50))) | |
| rsi = compute_rsi(prices) | |
| # RSI should be between 0 and 100 | |
| assert (rsi >= 0).all() | |
| assert (rsi <= 100).all() | |
| def test_uptrend_high_rsi(self): | |
| # Strong uptrend with enough data points | |
| prices = pd.Series([float(i) for i in range(1, 51)]) # 1 to 50 | |
| rsi = compute_rsi(prices) | |
| # Should be high (above 50 for uptrend) | |
| # Note: RSI depends on implementation details | |
| valid_rsi = rsi.dropna() | |
| if len(valid_rsi) > 0: | |
| assert valid_rsi.iloc[-1] >= 50 # Uptrend should have RSI >= 50 | |
| def test_downtrend_low_rsi(self): | |
| # Strong downtrend | |
| prices = pd.Series(range(30, 0, -1)) # 30 to 1 | |
| rsi = compute_rsi(prices) | |
| # Should be low (close to 0) | |
| assert rsi.iloc[-1] < 20 | |
| class TestComputeVolatility: | |
| def test_volatility_positive(self): | |
| returns = pd.Series([0.01, -0.02, 0.015, -0.01, 0.02, 0.01, -0.01, 0.02, -0.02, 0.01]) | |
| vol = compute_volatility(returns) | |
| # Only check non-NaN values | |
| valid_vol = vol.dropna() | |
| assert (valid_vol >= 0).all() | |
| def test_flat_returns_zero_vol(self): | |
| returns = pd.Series([0.01] * 10) # Constant returns | |
| vol = compute_volatility(returns) | |
| assert abs(vol.iloc[-1]) < 0.0001 | |
| class TestGenerateSymbolFeatures: | |
| def test_feature_columns_created(self, sample_price_data): | |
| features = generate_symbol_features(sample_price_data, "TEST") | |
| # Check expected columns exist | |
| assert "TEST_ret1" in features.columns | |
| assert "TEST_SMA_5" in features.columns | |
| assert "TEST_EMA_10" in features.columns | |
| assert "TEST_RSI_14" in features.columns | |
| assert "TEST_vol_10" in features.columns | |
| def test_lagged_features(self, sample_price_data): | |
| features = generate_symbol_features( | |
| sample_price_data, | |
| "TEST", | |
| include_lags=[1, 2, 5] | |
| ) | |
| assert "TEST_lag_ret1_1" in features.columns | |
| assert "TEST_lag_ret1_2" in features.columns | |
| assert "TEST_lag_ret1_5" in features.columns | |
| def test_output_same_index(self, sample_price_data): | |
| features = generate_symbol_features(sample_price_data, "TEST") | |
| assert len(features) == len(sample_price_data) | |
| assert features.index.equals(sample_price_data.index) | |
| def test_no_future_leakage(self, sample_price_data): | |
| """Ensure features don't use future data.""" | |
| features = generate_symbol_features(sample_price_data, "TEST") | |
| # Lagged returns should be shifted | |
| # lag_ret1_1 at time t should equal ret1 at time t-1 | |
| ret1 = features["TEST_ret1"] | |
| lag1 = features["TEST_lag_ret1_1"] | |
| # Check a middle value (not first few which may be NaN) | |
| idx = 10 | |
| assert abs(lag1.iloc[idx] - ret1.iloc[idx - 1]) < 0.0001 | |
| class TestTargetCreation: | |
| def test_target_shift(self, sample_price_data): | |
| """Target should be next-day return (shifted by -1).""" | |
| from app.features import compute_returns | |
| close = sample_price_data["close"] | |
| ret1 = compute_returns(close) | |
| target = ret1.shift(-1) # Next day's return | |
| # At time t, target should be the return from t to t+1 | |
| # Which equals (close[t+1] - close[t]) / close[t] | |
| for i in range(len(close) - 1): | |
| expected = (close.iloc[i + 1] - close.iloc[i]) / close.iloc[i] | |
| if not pd.isna(target.iloc[i]): | |
| assert abs(target.iloc[i] - expected) < 0.0001 | |
| # Last value should be NaN (no future data) | |
| assert pd.isna(target.iloc[-1]) | |