Spaces:
Running
Running
File size: 1,101 Bytes
09283ec c06b0aa 09283ec c06b0aa 570d1fd 09283ec c06b0aa 09283ec c06b0aa 570d1fd c06b0aa 570d1fd e4ee23f c06b0aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# utils/preprocessing.py
import pandas as pd
def load_and_process_data(file_path, is_multivariate, keep_datetime_column_for_darts=False):
df = pd.read_csv(file_path)
# Auto-detect time column
time_col = None
for col in df.columns:
if pd.api.types.is_datetime64_any_dtype(df[col]) or "date" in col.lower() or "time" in col.lower():
time_col = col
break
if time_col:
df[time_col] = pd.to_datetime(df[time_col], errors="coerce") # force datetime conversion
df = df.dropna(subset=[time_col]) # drop rows where datetime is NaT
df.set_index(time_col, inplace=True)
if not is_multivariate:
numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns
if len(numeric_cols) == 0:
raise ValueError("No numeric column found for univariate forecast.")
if keep_datetime_column_for_darts:
df = df[[numeric_cols[0]]] # Keep datetime for Darts
else:
df = df[[numeric_cols[0]]] # Just the numeric column, no datetime needed
return df, df.shape[1]
|