Spaces:

ifieryarrows
/

copper-mind

Running

File size: 5,658 Bytes

"""
Tests for feature engineering functions.
"""

import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

from app.features import (
    compute_returns,
    compute_sma,
    compute_ema,
    compute_rsi,
    compute_volatility,
    generate_symbol_features,
)


class TestComputeReturns:
    def test_simple_returns(self):
        prices = pd.Series([100, 110, 105])
        returns = compute_returns(prices)
        
        assert pd.isna(returns.iloc[0])  # First return is NaN
        assert abs(returns.iloc[1] - 0.10) < 0.001  # 10% return
        assert abs(returns.iloc[2] - (-0.0454545)) < 0.001  # -4.5% return
    
    def test_multi_period_returns(self):
        prices = pd.Series([100, 105, 110, 115])
        returns = compute_returns(prices, periods=2)
        
        # 2-period return from 100 to 110
        assert abs(returns.iloc[2] - 0.10) < 0.001


class TestComputeSMA:
    def test_simple_case(self):
        prices = pd.Series([1, 2, 3, 4, 5])
        sma = compute_sma(prices, window=3)
        
        # SMA of last 3 values [3, 4, 5] = 4
        assert abs(sma.iloc[-1] - 4.0) < 0.001
    
    def test_handles_short_series(self):
        prices = pd.Series([1, 2])
        sma = compute_sma(prices, window=5)
        
        # Should still produce values with min_periods=1
        assert not sma.isna().all()


class TestComputeEMA:
    def test_more_weight_to_recent(self):
        prices = pd.Series([1, 1, 1, 1, 5])  # Jump at end
        ema = compute_ema(prices, span=3)
        sma = compute_sma(prices, window=3)
        
        # EMA should be higher than SMA due to recent jump
        assert ema.iloc[-1] > sma.iloc[-1]


class TestComputeRSI:
    def test_rsi_range(self):
        # Generate random walk
        np.random.seed(42)
        prices = pd.Series(100 + np.cumsum(np.random.randn(50)))
        rsi = compute_rsi(prices)
        
        # RSI should be between 0 and 100
        assert (rsi >= 0).all()
        assert (rsi <= 100).all()
    
    def test_uptrend_high_rsi(self):
        # Strong uptrend with enough data points
        prices = pd.Series([float(i) for i in range(1, 51)])  # 1 to 50
        rsi = compute_rsi(prices)
        
        # Should be high (above 50 for uptrend)
        # Note: RSI depends on implementation details
        valid_rsi = rsi.dropna()
        if len(valid_rsi) > 0:
            assert valid_rsi.iloc[-1] >= 50  # Uptrend should have RSI >= 50
    
    def test_downtrend_low_rsi(self):
        # Strong downtrend
        prices = pd.Series(range(30, 0, -1))  # 30 to 1
        rsi = compute_rsi(prices)
        
        # Should be low (close to 0)
        assert rsi.iloc[-1] < 20


class TestComputeVolatility:
    def test_volatility_positive(self):
        returns = pd.Series([0.01, -0.02, 0.015, -0.01, 0.02, 0.01, -0.01, 0.02, -0.02, 0.01])
        vol = compute_volatility(returns)
        
        # Only check non-NaN values
        valid_vol = vol.dropna()
        assert (valid_vol >= 0).all()
    
    def test_flat_returns_zero_vol(self):
        returns = pd.Series([0.01] * 10)  # Constant returns
        vol = compute_volatility(returns)
        
        assert abs(vol.iloc[-1]) < 0.0001


class TestGenerateSymbolFeatures:
    def test_feature_columns_created(self, sample_price_data):
        features = generate_symbol_features(sample_price_data, "TEST")
        
        # Check expected columns exist
        assert "TEST_ret1" in features.columns
        assert "TEST_SMA_5" in features.columns
        assert "TEST_EMA_10" in features.columns
        assert "TEST_RSI_14" in features.columns
        assert "TEST_vol_10" in features.columns
    
    def test_lagged_features(self, sample_price_data):
        features = generate_symbol_features(
            sample_price_data,
            "TEST",
            include_lags=[1, 2, 5]
        )
        
        assert "TEST_lag_ret1_1" in features.columns
        assert "TEST_lag_ret1_2" in features.columns
        assert "TEST_lag_ret1_5" in features.columns
    
    def test_output_same_index(self, sample_price_data):
        features = generate_symbol_features(sample_price_data, "TEST")
        
        assert len(features) == len(sample_price_data)
        assert features.index.equals(sample_price_data.index)
    
    def test_no_future_leakage(self, sample_price_data):
        """Ensure features don't use future data."""
        features = generate_symbol_features(sample_price_data, "TEST")
        
        # Lagged returns should be shifted
        # lag_ret1_1 at time t should equal ret1 at time t-1
        ret1 = features["TEST_ret1"]
        lag1 = features["TEST_lag_ret1_1"]
        
        # Check a middle value (not first few which may be NaN)
        idx = 10
        assert abs(lag1.iloc[idx] - ret1.iloc[idx - 1]) < 0.0001


class TestTargetCreation:
    def test_target_shift(self, sample_price_data):
        """Target should be next-day return (shifted by -1)."""
        from app.features import compute_returns
        
        close = sample_price_data["close"]
        ret1 = compute_returns(close)
        target = ret1.shift(-1)  # Next day's return
        
        # At time t, target should be the return from t to t+1
        # Which equals (close[t+1] - close[t]) / close[t]
        for i in range(len(close) - 1):
            expected = (close.iloc[i + 1] - close.iloc[i]) / close.iloc[i]
            if not pd.isna(target.iloc[i]):
                assert abs(target.iloc[i] - expected) < 0.0001
        
        # Last value should be NaN (no future data)
        assert pd.isna(target.iloc[-1])