# utils/preprocessing.py import pandas as pd import numpy as np def preprocess_data(data): # If data has MultiIndex columns, select the first level if isinstance(data.columns, pd.MultiIndex): data.columns = data.columns.get_level_values(0) # Add technical indicators data['SMA_20'] = data['Close'].rolling(window=20).mean() data['SMA_50'] = data['Close'].rolling(window=50).mean() # Calculate RSI delta = data['Close'].diff() gain = (delta.where(delta > 0, 0)).rolling(window=14).mean() loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean() rs = gain / loss data['RSI'] = 100 - (100 / (1 + rs)) # Calculate MACD exp1 = data['Close'].ewm(span=12, adjust=False).mean() exp2 = data['Close'].ewm(span=26, adjust=False).mean() data['MACD'] = exp1 - exp2 data['Signal_Line'] = data['MACD'].ewm(span=9, adjust=False).mean() # Handle missing values data = data.fillna(method='ffill') return data