Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import pickle | |
| from sklearn.preprocessing import LabelEncoder | |
| from config import TEXT_COLUMN, LABEL_COLUMNS, METADATA_COLUMNS, LABEL_ENCODERS_PATH | |
| def load_and_preprocess_data(data_path): | |
| """ | |
| Loads the data from a CSV file, fills missing values, and encodes label columns. | |
| """ | |
| data = pd.read_csv(data_path) | |
| data.fillna("Unknown", inplace=True) | |
| for col in METADATA_COLUMNS: | |
| if col in data.columns: | |
| data[col] = pd.to_numeric(data[col], errors='coerce').fillna(0) | |
| label_encoders = {} | |
| for col in LABEL_COLUMNS: | |
| le = LabelEncoder() | |
| data[col] = le.fit_transform(data[col]) | |
| label_encoders[col] = le | |
| return data, label_encoders | |
| def save_label_encoders(label_encoders): | |
| with open(LABEL_ENCODERS_PATH, "wb") as f: | |
| pickle.dump(label_encoders, f) | |
| def load_label_encoders(): | |
| with open(LABEL_ENCODERS_PATH, "rb") as f: | |
| return pickle.load(f) | |