""" Tests for feature engineering functions. """ import pytest import pandas as pd import numpy as np from datetime import datetime, timedelta from app.features import ( compute_returns, compute_sma, compute_ema, compute_rsi, compute_volatility, generate_symbol_features, ) class TestComputeReturns: def test_simple_returns(self): prices = pd.Series([100, 110, 105]) returns = compute_returns(prices) assert pd.isna(returns.iloc[0]) # First return is NaN assert abs(returns.iloc[1] - 0.10) < 0.001 # 10% return assert abs(returns.iloc[2] - (-0.0454545)) < 0.001 # -4.5% return def test_multi_period_returns(self): prices = pd.Series([100, 105, 110, 115]) returns = compute_returns(prices, periods=2) # 2-period return from 100 to 110 assert abs(returns.iloc[2] - 0.10) < 0.001 class TestComputeSMA: def test_simple_case(self): prices = pd.Series([1, 2, 3, 4, 5]) sma = compute_sma(prices, window=3) # SMA of last 3 values [3, 4, 5] = 4 assert abs(sma.iloc[-1] - 4.0) < 0.001 def test_handles_short_series(self): prices = pd.Series([1, 2]) sma = compute_sma(prices, window=5) # Should still produce values with min_periods=1 assert not sma.isna().all() class TestComputeEMA: def test_more_weight_to_recent(self): prices = pd.Series([1, 1, 1, 1, 5]) # Jump at end ema = compute_ema(prices, span=3) sma = compute_sma(prices, window=3) # EMA should be higher than SMA due to recent jump assert ema.iloc[-1] > sma.iloc[-1] class TestComputeRSI: def test_rsi_range(self): # Generate random walk np.random.seed(42) prices = pd.Series(100 + np.cumsum(np.random.randn(50))) rsi = compute_rsi(prices) # RSI should be between 0 and 100 assert (rsi >= 0).all() assert (rsi <= 100).all() def test_uptrend_high_rsi(self): # Strong uptrend with enough data points prices = pd.Series([float(i) for i in range(1, 51)]) # 1 to 50 rsi = compute_rsi(prices) # Should be high (above 50 for uptrend) # Note: RSI depends on implementation details valid_rsi = rsi.dropna() if len(valid_rsi) > 0: assert valid_rsi.iloc[-1] >= 50 # Uptrend should have RSI >= 50 def test_downtrend_low_rsi(self): # Strong downtrend prices = pd.Series(range(30, 0, -1)) # 30 to 1 rsi = compute_rsi(prices) # Should be low (close to 0) assert rsi.iloc[-1] < 20 class TestComputeVolatility: def test_volatility_positive(self): returns = pd.Series([0.01, -0.02, 0.015, -0.01, 0.02, 0.01, -0.01, 0.02, -0.02, 0.01]) vol = compute_volatility(returns) # Only check non-NaN values valid_vol = vol.dropna() assert (valid_vol >= 0).all() def test_flat_returns_zero_vol(self): returns = pd.Series([0.01] * 10) # Constant returns vol = compute_volatility(returns) assert abs(vol.iloc[-1]) < 0.0001 class TestGenerateSymbolFeatures: def test_feature_columns_created(self, sample_price_data): features = generate_symbol_features(sample_price_data, "TEST") # Check expected columns exist assert "TEST_ret1" in features.columns assert "TEST_SMA_5" in features.columns assert "TEST_EMA_10" in features.columns assert "TEST_RSI_14" in features.columns assert "TEST_vol_10" in features.columns def test_lagged_features(self, sample_price_data): features = generate_symbol_features( sample_price_data, "TEST", include_lags=[1, 2, 5] ) assert "TEST_lag_ret1_1" in features.columns assert "TEST_lag_ret1_2" in features.columns assert "TEST_lag_ret1_5" in features.columns def test_output_same_index(self, sample_price_data): features = generate_symbol_features(sample_price_data, "TEST") assert len(features) == len(sample_price_data) assert features.index.equals(sample_price_data.index) def test_no_future_leakage(self, sample_price_data): """Ensure features don't use future data.""" features = generate_symbol_features(sample_price_data, "TEST") # Lagged returns should be shifted # lag_ret1_1 at time t should equal ret1 at time t-1 ret1 = features["TEST_ret1"] lag1 = features["TEST_lag_ret1_1"] # Check a middle value (not first few which may be NaN) idx = 10 assert abs(lag1.iloc[idx] - ret1.iloc[idx - 1]) < 0.0001 class TestTargetCreation: def test_target_shift(self, sample_price_data): """Target should be next-day return (shifted by -1).""" from app.features import compute_returns close = sample_price_data["close"] ret1 = compute_returns(close) target = ret1.shift(-1) # Next day's return # At time t, target should be the return from t to t+1 # Which equals (close[t+1] - close[t]) / close[t] for i in range(len(close) - 1): expected = (close.iloc[i + 1] - close.iloc[i]) / close.iloc[i] if not pd.isna(target.iloc[i]): assert abs(target.iloc[i] - expected) < 0.0001 # Last value should be NaN (no future data) assert pd.isna(target.iloc[-1])