File size: 1,101 Bytes
09283ec
 
 
c06b0aa
09283ec
 
 
 
 
 
 
 
 
 
c06b0aa
570d1fd
09283ec
 
 
c06b0aa
09283ec
 
 
c06b0aa
570d1fd
c06b0aa
570d1fd
e4ee23f
c06b0aa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# utils/preprocessing.py
import pandas as pd

def load_and_process_data(file_path, is_multivariate, keep_datetime_column_for_darts=False):
    df = pd.read_csv(file_path)

    # Auto-detect time column
    time_col = None
    for col in df.columns:
        if pd.api.types.is_datetime64_any_dtype(df[col]) or "date" in col.lower() or "time" in col.lower():
            time_col = col
            break

    if time_col:
        df[time_col] = pd.to_datetime(df[time_col], errors="coerce")  # force datetime conversion
        df = df.dropna(subset=[time_col])  # drop rows where datetime is NaT
        df.set_index(time_col, inplace=True)

    if not is_multivariate:
        numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns
        if len(numeric_cols) == 0:
            raise ValueError("No numeric column found for univariate forecast.")

        if keep_datetime_column_for_darts:
            df = df[[numeric_cols[0]]]  # Keep datetime for Darts
        else:
            df = df[[numeric_cols[0]]]  # Just the numeric column, no datetime needed

    return df, df.shape[1]