""" Simplified data processor for Hugging Face compatibility. """ import pandas as pd import numpy as np from datetime import datetime, timedelta import logging logger = logging.getLogger(__name__) class AdvancedDataProcessor: """Advanced data processor for time series forecasting.""" def __init__(self, config: dict): self.config = config self.scalers = {} self.feature_columns = [] def engineer_features(self, df: pd.DataFrame, date_col: str, value_col: str) -> pd.DataFrame: """Create comprehensive time series features.""" df = df.copy() # DateTime features df['year'] = df[date_col].dt.year df['month'] = df[date_col].dt.month df['week'] = df[date_col].dt.isocalendar().week df['day'] = df[date_col].dt.day df['dayofweek'] = df[date_col].dt.dayofweek df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int) df['quarter'] = df[date_col].dt.quarter df['dayofyear'] = df[date_col].dt.dayofyear # Lag features for lag in self.config.get('lags', [1, 7, 30]): df[f'lag_{lag}'] = df[value_col].shift(lag) # Rolling statistics for window in self.config.get('rolling_windows', [7, 30]): df[f'rolling_mean_{window}'] = df[value_col].rolling(window=window).mean() df[f'rolling_std_{window}'] = df[value_col].rolling(window=window).std() # Difference features for diff in self.config.get('differences', [1, 7]): df[f'diff_{diff}'] = df[value_col].diff(diff) # Seasonal features df['seasonal_sin'] = np.sin(2 * np.pi * df['dayofyear'] / 365) df['seasonal_cos'] = np.cos(2 * np.pi * df['dayofyear'] / 365) # Handle missing values df = df.fillna(method='bfill').fillna(method='ffill') self.feature_columns = [col for col in df.columns if col not in [date_col, value_col]] return df def create_sequences(self, df: pd.DataFrame, target_col: str, feature_cols: list, seq_length: int = 30, forecast_horizon: int = 7) -> tuple: """Create sequences for deep learning models.""" X, y = [], [] data = df[feature_cols + [target_col]].values for i in range(len(data) - seq_length - forecast_horizon + 1): X.append(data[i:i+seq_length, :-1]) # Features y.append(data[i+seq_length:i+seq_length+forecast_horizon, -1]) # Target return np.array(X), np.array(y)