File size: 1,361 Bytes
bb4e66c
53e2114
 
bb4e66c
53e2114
 
bb4e66c
53e2114
 
bb4e66c
53e2114
 
 
 
 
 
 
 
 
 
 
 
 
bb4e66c
53e2114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler

def load_csv(file_path_or_obj):
    return pd.read_csv(file_path_or_obj)

def get_numeric(df: pd.DataFrame, strategy: str = "Fill with Mean") -> pd.DataFrame:
    numeric_df = df.select_dtypes(include=['number'])

    if strategy == "Fill with Mean":
        return numeric_df.fillna(numeric_df.mean(numeric_only=True))
    elif strategy == "Fill with Zero":
        return numeric_df.fillna(0)
    elif strategy == "Drop Rows":
        return numeric_df.dropna()
    else:
        return numeric_df

def get_text_column(df: pd.DataFrame) -> list:
    text_columns = df.select_dtypes(include=['object']).columns
    if not text_columns.empty:
        return df[text_columns[0]].dropna().astype(str).tolist()
    return []

def normalize_data(data: pd.DataFrame, method: str):
    if method == "z-score":
        scaler = StandardScaler()
    elif method == "mapminmax":
        scaler = MinMaxScaler()
    else:  # "none"
        return data.copy(), None

    scaled = scaler.fit_transform(data)
    return pd.DataFrame(scaled, columns=data.columns), scaler

def denormalize_data(scaled_data: pd.DataFrame, scaler):
    if scaler is None:
        return scaled_data
    return pd.DataFrame(scaler.inverse_transform(scaled_data), columns=scaled_data.columns)