import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler, MinMaxScaler def load_csv(file_path_or_obj): return pd.read_csv(file_path_or_obj) def get_numeric(df: pd.DataFrame, strategy: str = "Fill with Mean") -> pd.DataFrame: numeric_df = df.select_dtypes(include=['number']) if strategy == "Fill with Mean": return numeric_df.fillna(numeric_df.mean(numeric_only=True)) elif strategy == "Fill with Zero": return numeric_df.fillna(0) elif strategy == "Drop Rows": return numeric_df.dropna() else: return numeric_df def get_text_column(df: pd.DataFrame) -> list: text_columns = df.select_dtypes(include=['object']).columns if not text_columns.empty: return df[text_columns[0]].dropna().astype(str).tolist() return [] def normalize_data(data: pd.DataFrame, method: str): if method == "z-score": scaler = StandardScaler() elif method == "mapminmax": scaler = MinMaxScaler() else: # "none" return data.copy(), None scaled = scaler.fit_transform(data) return pd.DataFrame(scaled, columns=data.columns), scaler def denormalize_data(scaled_data: pd.DataFrame, scaler): if scaler is None: return scaled_data return pd.DataFrame(scaler.inverse_transform(scaled_data), columns=scaled_data.columns)