Spaces:

ifieryarrows
/

copper-mind

Running

App Files Files Community

copper-mind / tests /test_features.py

ifieryarrows

Sync from GitHub

6afe139 verified 2 months ago

raw

history blame contribute delete

5.66 kB

	"""
	Tests for feature engineering functions.
	"""

	import pytest
	import pandas as pd
	import numpy as np
	from datetime import datetime, timedelta

	from app.features import (
	compute_returns,
	compute_sma,
	compute_ema,
	compute_rsi,
	compute_volatility,
	generate_symbol_features,
	)


	class TestComputeReturns:
	def test_simple_returns(self):
	prices = pd.Series([100, 110, 105])
	returns = compute_returns(prices)

	assert pd.isna(returns.iloc[0]) # First return is NaN
	assert abs(returns.iloc[1] - 0.10) < 0.001 # 10% return
	assert abs(returns.iloc[2] - (-0.0454545)) < 0.001 # -4.5% return

	def test_multi_period_returns(self):
	prices = pd.Series([100, 105, 110, 115])
	returns = compute_returns(prices, periods=2)

	# 2-period return from 100 to 110
	assert abs(returns.iloc[2] - 0.10) < 0.001


	class TestComputeSMA:
	def test_simple_case(self):
	prices = pd.Series([1, 2, 3, 4, 5])
	sma = compute_sma(prices, window=3)

	# SMA of last 3 values [3, 4, 5] = 4
	assert abs(sma.iloc[-1] - 4.0) < 0.001

	def test_handles_short_series(self):
	prices = pd.Series([1, 2])
	sma = compute_sma(prices, window=5)

	# Should still produce values with min_periods=1
	assert not sma.isna().all()


	class TestComputeEMA:
	def test_more_weight_to_recent(self):
	prices = pd.Series([1, 1, 1, 1, 5]) # Jump at end
	ema = compute_ema(prices, span=3)
	sma = compute_sma(prices, window=3)

	# EMA should be higher than SMA due to recent jump
	assert ema.iloc[-1] > sma.iloc[-1]


	class TestComputeRSI:
	def test_rsi_range(self):
	# Generate random walk
	np.random.seed(42)
	prices = pd.Series(100 + np.cumsum(np.random.randn(50)))
	rsi = compute_rsi(prices)

	# RSI should be between 0 and 100
	assert (rsi >= 0).all()
	assert (rsi <= 100).all()

	def test_uptrend_high_rsi(self):
	# Strong uptrend with enough data points
	prices = pd.Series([float(i) for i in range(1, 51)]) # 1 to 50
	rsi = compute_rsi(prices)

	# Should be high (above 50 for uptrend)
	# Note: RSI depends on implementation details
	valid_rsi = rsi.dropna()
	if len(valid_rsi) > 0:
	assert valid_rsi.iloc[-1] >= 50 # Uptrend should have RSI >= 50

	def test_downtrend_low_rsi(self):
	# Strong downtrend
	prices = pd.Series(range(30, 0, -1)) # 30 to 1
	rsi = compute_rsi(prices)

	# Should be low (close to 0)
	assert rsi.iloc[-1] < 20


	class TestComputeVolatility:
	def test_volatility_positive(self):
	returns = pd.Series([0.01, -0.02, 0.015, -0.01, 0.02, 0.01, -0.01, 0.02, -0.02, 0.01])
	vol = compute_volatility(returns)

	# Only check non-NaN values
	valid_vol = vol.dropna()
	assert (valid_vol >= 0).all()

	def test_flat_returns_zero_vol(self):
	returns = pd.Series([0.01] * 10) # Constant returns
	vol = compute_volatility(returns)

	assert abs(vol.iloc[-1]) < 0.0001


	class TestGenerateSymbolFeatures:
	def test_feature_columns_created(self, sample_price_data):
	features = generate_symbol_features(sample_price_data, "TEST")

	# Check expected columns exist
	assert "TEST_ret1" in features.columns
	assert "TEST_SMA_5" in features.columns
	assert "TEST_EMA_10" in features.columns
	assert "TEST_RSI_14" in features.columns
	assert "TEST_vol_10" in features.columns

	def test_lagged_features(self, sample_price_data):
	features = generate_symbol_features(
	sample_price_data,
	"TEST",
	include_lags=[1, 2, 5]
	)

	assert "TEST_lag_ret1_1" in features.columns
	assert "TEST_lag_ret1_2" in features.columns
	assert "TEST_lag_ret1_5" in features.columns

	def test_output_same_index(self, sample_price_data):
	features = generate_symbol_features(sample_price_data, "TEST")

	assert len(features) == len(sample_price_data)
	assert features.index.equals(sample_price_data.index)

	def test_no_future_leakage(self, sample_price_data):
	"""Ensure features don't use future data."""
	features = generate_symbol_features(sample_price_data, "TEST")

	# Lagged returns should be shifted
	# lag_ret1_1 at time t should equal ret1 at time t-1
	ret1 = features["TEST_ret1"]
	lag1 = features["TEST_lag_ret1_1"]

	# Check a middle value (not first few which may be NaN)
	idx = 10
	assert abs(lag1.iloc[idx] - ret1.iloc[idx - 1]) < 0.0001


	class TestTargetCreation:
	def test_target_shift(self, sample_price_data):
	"""Target should be next-day return (shifted by -1)."""
	from app.features import compute_returns

	close = sample_price_data["close"]
	ret1 = compute_returns(close)
	target = ret1.shift(-1) # Next day's return

	# At time t, target should be the return from t to t+1
	# Which equals (close[t+1] - close[t]) / close[t]
	for i in range(len(close) - 1):
	expected = (close.iloc[i + 1] - close.iloc[i]) / close.iloc[i]
	if not pd.isna(target.iloc[i]):
	assert abs(target.iloc[i] - expected) < 0.0001

	# Last value should be NaN (no future data)
	assert pd.isna(target.iloc[-1])