Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler, MinMaxScaler | |
| def load_csv(file_path_or_obj): | |
| return pd.read_csv(file_path_or_obj) | |
| def get_numeric(df: pd.DataFrame, strategy: str = "Fill with Mean") -> pd.DataFrame: | |
| numeric_df = df.select_dtypes(include=['number']) | |
| if strategy == "Fill with Mean": | |
| return numeric_df.fillna(numeric_df.mean(numeric_only=True)) | |
| elif strategy == "Fill with Zero": | |
| return numeric_df.fillna(0) | |
| elif strategy == "Drop Rows": | |
| return numeric_df.dropna() | |
| else: | |
| return numeric_df | |
| def get_text_column(df: pd.DataFrame) -> list: | |
| text_columns = df.select_dtypes(include=['object']).columns | |
| if not text_columns.empty: | |
| return df[text_columns[0]].dropna().astype(str).tolist() | |
| return [] | |
| def normalize_data(data: pd.DataFrame, method: str): | |
| if method == "z-score": | |
| scaler = StandardScaler() | |
| elif method == "mapminmax": | |
| scaler = MinMaxScaler() | |
| else: # "none" | |
| return data.copy(), None | |
| scaled = scaler.fit_transform(data) | |
| return pd.DataFrame(scaled, columns=data.columns), scaler | |
| def denormalize_data(scaled_data: pd.DataFrame, scaler): | |
| if scaler is None: | |
| return scaled_data | |
| return pd.DataFrame(scaler.inverse_transform(scaled_data), columns=scaled_data.columns) | |