File size: 5,658 Bytes
f64b002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6afe139
 
f64b002
 
6afe139
 
 
 
 
f64b002
 
 
 
 
 
 
 
 
 
 
 
6afe139
f64b002
 
6afe139
 
 
f64b002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
Tests for feature engineering functions.
"""

import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

from app.features import (
    compute_returns,
    compute_sma,
    compute_ema,
    compute_rsi,
    compute_volatility,
    generate_symbol_features,
)


class TestComputeReturns:
    def test_simple_returns(self):
        prices = pd.Series([100, 110, 105])
        returns = compute_returns(prices)
        
        assert pd.isna(returns.iloc[0])  # First return is NaN
        assert abs(returns.iloc[1] - 0.10) < 0.001  # 10% return
        assert abs(returns.iloc[2] - (-0.0454545)) < 0.001  # -4.5% return
    
    def test_multi_period_returns(self):
        prices = pd.Series([100, 105, 110, 115])
        returns = compute_returns(prices, periods=2)
        
        # 2-period return from 100 to 110
        assert abs(returns.iloc[2] - 0.10) < 0.001


class TestComputeSMA:
    def test_simple_case(self):
        prices = pd.Series([1, 2, 3, 4, 5])
        sma = compute_sma(prices, window=3)
        
        # SMA of last 3 values [3, 4, 5] = 4
        assert abs(sma.iloc[-1] - 4.0) < 0.001
    
    def test_handles_short_series(self):
        prices = pd.Series([1, 2])
        sma = compute_sma(prices, window=5)
        
        # Should still produce values with min_periods=1
        assert not sma.isna().all()


class TestComputeEMA:
    def test_more_weight_to_recent(self):
        prices = pd.Series([1, 1, 1, 1, 5])  # Jump at end
        ema = compute_ema(prices, span=3)
        sma = compute_sma(prices, window=3)
        
        # EMA should be higher than SMA due to recent jump
        assert ema.iloc[-1] > sma.iloc[-1]


class TestComputeRSI:
    def test_rsi_range(self):
        # Generate random walk
        np.random.seed(42)
        prices = pd.Series(100 + np.cumsum(np.random.randn(50)))
        rsi = compute_rsi(prices)
        
        # RSI should be between 0 and 100
        assert (rsi >= 0).all()
        assert (rsi <= 100).all()
    
    def test_uptrend_high_rsi(self):
        # Strong uptrend with enough data points
        prices = pd.Series([float(i) for i in range(1, 51)])  # 1 to 50
        rsi = compute_rsi(prices)
        
        # Should be high (above 50 for uptrend)
        # Note: RSI depends on implementation details
        valid_rsi = rsi.dropna()
        if len(valid_rsi) > 0:
            assert valid_rsi.iloc[-1] >= 50  # Uptrend should have RSI >= 50
    
    def test_downtrend_low_rsi(self):
        # Strong downtrend
        prices = pd.Series(range(30, 0, -1))  # 30 to 1
        rsi = compute_rsi(prices)
        
        # Should be low (close to 0)
        assert rsi.iloc[-1] < 20


class TestComputeVolatility:
    def test_volatility_positive(self):
        returns = pd.Series([0.01, -0.02, 0.015, -0.01, 0.02, 0.01, -0.01, 0.02, -0.02, 0.01])
        vol = compute_volatility(returns)
        
        # Only check non-NaN values
        valid_vol = vol.dropna()
        assert (valid_vol >= 0).all()
    
    def test_flat_returns_zero_vol(self):
        returns = pd.Series([0.01] * 10)  # Constant returns
        vol = compute_volatility(returns)
        
        assert abs(vol.iloc[-1]) < 0.0001


class TestGenerateSymbolFeatures:
    def test_feature_columns_created(self, sample_price_data):
        features = generate_symbol_features(sample_price_data, "TEST")
        
        # Check expected columns exist
        assert "TEST_ret1" in features.columns
        assert "TEST_SMA_5" in features.columns
        assert "TEST_EMA_10" in features.columns
        assert "TEST_RSI_14" in features.columns
        assert "TEST_vol_10" in features.columns
    
    def test_lagged_features(self, sample_price_data):
        features = generate_symbol_features(
            sample_price_data,
            "TEST",
            include_lags=[1, 2, 5]
        )
        
        assert "TEST_lag_ret1_1" in features.columns
        assert "TEST_lag_ret1_2" in features.columns
        assert "TEST_lag_ret1_5" in features.columns
    
    def test_output_same_index(self, sample_price_data):
        features = generate_symbol_features(sample_price_data, "TEST")
        
        assert len(features) == len(sample_price_data)
        assert features.index.equals(sample_price_data.index)
    
    def test_no_future_leakage(self, sample_price_data):
        """Ensure features don't use future data."""
        features = generate_symbol_features(sample_price_data, "TEST")
        
        # Lagged returns should be shifted
        # lag_ret1_1 at time t should equal ret1 at time t-1
        ret1 = features["TEST_ret1"]
        lag1 = features["TEST_lag_ret1_1"]
        
        # Check a middle value (not first few which may be NaN)
        idx = 10
        assert abs(lag1.iloc[idx] - ret1.iloc[idx - 1]) < 0.0001


class TestTargetCreation:
    def test_target_shift(self, sample_price_data):
        """Target should be next-day return (shifted by -1)."""
        from app.features import compute_returns
        
        close = sample_price_data["close"]
        ret1 = compute_returns(close)
        target = ret1.shift(-1)  # Next day's return
        
        # At time t, target should be the return from t to t+1
        # Which equals (close[t+1] - close[t]) / close[t]
        for i in range(len(close) - 1):
            expected = (close.iloc[i + 1] - close.iloc[i]) / close.iloc[i]
            if not pd.isna(target.iloc[i]):
                assert abs(target.iloc[i] - expected) < 0.0001
        
        # Last value should be NaN (no future data)
        assert pd.isna(target.iloc[-1])