Spaces:
Sleeping
Sleeping
| """ | |
| Simplified data processor for Hugging Face compatibility. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class AdvancedDataProcessor: | |
| """Advanced data processor for time series forecasting.""" | |
| def __init__(self, config: dict): | |
| self.config = config | |
| self.scalers = {} | |
| self.feature_columns = [] | |
| def engineer_features(self, df: pd.DataFrame, | |
| date_col: str, | |
| value_col: str) -> pd.DataFrame: | |
| """Create comprehensive time series features.""" | |
| df = df.copy() | |
| # DateTime features | |
| df['year'] = df[date_col].dt.year | |
| df['month'] = df[date_col].dt.month | |
| df['week'] = df[date_col].dt.isocalendar().week | |
| df['day'] = df[date_col].dt.day | |
| df['dayofweek'] = df[date_col].dt.dayofweek | |
| df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int) | |
| df['quarter'] = df[date_col].dt.quarter | |
| df['dayofyear'] = df[date_col].dt.dayofyear | |
| # Lag features | |
| for lag in self.config.get('lags', [1, 7, 30]): | |
| df[f'lag_{lag}'] = df[value_col].shift(lag) | |
| # Rolling statistics | |
| for window in self.config.get('rolling_windows', [7, 30]): | |
| df[f'rolling_mean_{window}'] = df[value_col].rolling(window=window).mean() | |
| df[f'rolling_std_{window}'] = df[value_col].rolling(window=window).std() | |
| # Difference features | |
| for diff in self.config.get('differences', [1, 7]): | |
| df[f'diff_{diff}'] = df[value_col].diff(diff) | |
| # Seasonal features | |
| df['seasonal_sin'] = np.sin(2 * np.pi * df['dayofyear'] / 365) | |
| df['seasonal_cos'] = np.cos(2 * np.pi * df['dayofyear'] / 365) | |
| # Handle missing values | |
| df = df.fillna(method='bfill').fillna(method='ffill') | |
| self.feature_columns = [col for col in df.columns if col not in [date_col, value_col]] | |
| return df | |
| def create_sequences(self, df: pd.DataFrame, | |
| target_col: str, | |
| feature_cols: list, | |
| seq_length: int = 30, | |
| forecast_horizon: int = 7) -> tuple: | |
| """Create sequences for deep learning models.""" | |
| X, y = [], [] | |
| data = df[feature_cols + [target_col]].values | |
| for i in range(len(data) - seq_length - forecast_horizon + 1): | |
| X.append(data[i:i+seq_length, :-1]) # Features | |
| y.append(data[i+seq_length:i+seq_length+forecast_horizon, -1]) # Target | |
| return np.array(X), np.array(y) |