import pytest import pandas as pd import numpy as np from src.processing.features import calculate_sma, calculate_rsi, calculate_macd, process_data from src.processing.split import split_data # Sample Data Fixture @pytest.fixture def sample_data(): data = { 'timestamp': pd.date_range(start='2023-01-01', periods=100), 'close': np.random.rand(100) * 100 } return pd.DataFrame(data) def test_calculate_sma(sample_data): """Test Simple Moving Average calculation.""" window = 20 sma = calculate_sma(sample_data, window) assert len(sma) == 100 assert sma.iloc[0:window-1].isna().all() # First window-1 should be NaN assert not sma.iloc[window:].isna().any() def test_calculate_rsi(sample_data): """Test RSI calculation.""" rsi = calculate_rsi(sample_data) assert len(rsi) == 100 assert rsi.min() >= 0 assert rsi.max() <= 100 def test_calculate_macd(sample_data): """Test MACD calculation.""" macd, signal = calculate_macd(sample_data) assert len(macd) == 100 assert len(signal) == 100 assert not macd.isna().all() def test_split_data(sample_data): """Test data splitting.""" train, test = split_data(sample_data, test_size=0.2) assert len(train) == 80 assert len(test) == 20 # Ensure no overlap and correct order assert train['timestamp'].max() < test['timestamp'].min() def test_process_data_structure(tmp_path): """Test process_data function output structure.""" # Create a dummy CSV df = pd.DataFrame({ 'timestamp': pd.date_range(start='2023-01-01', periods=60), 'close': [100 + i for i in range(60)] # Linear uptrend }) input_file = tmp_path / "test_input.csv" df.to_csv(input_file, index=False) processed_df = process_data(str(input_file)) expected_columns = ['sma_20', 'sma_50', 'rsi', 'macd', 'target_direction', 'target_price'] for col in expected_columns: assert col in processed_df.columns # Check if NaNs from rolling windows are dropped # SMA_50 needs 50 points, so we expect some data loss assert len(processed_df) < 60