Spaces:
Sleeping
Sleeping
| import os | |
| from sklearn.model_selection import train_test_split | |
| from ..tabular.pipelines import build_preprocessing_pipeline | |
| from ..tabular.trainers import train_model | |
| from ..tabular.evaluators import evaluate_model | |
| #from ..nlp.trainers import TextClassifier | |
| #from ..nlp.evaluators import evaluate_nlp_model | |
| from ..utils.model_io import ModelIO | |
| class ModelFactory: | |
| def __init__(self): | |
| self.model_io = ModelIO() | |
| def build_and_train(self, df, target_column, dataset_info, problem_type, strategy): | |
| if dataset_info["small_data"]: | |
| raise ValueError("Dataset is too small for training. Minimum 1200 rows required.") | |
| if problem_type == "nlp": | |
| raise ValueError("NLP functionality is not supported in this version.") | |
| else: | |
| # Tabular | |
| X = df.drop(columns=[target_column]) | |
| y = df[target_column] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"]) | |
| pipeline.fit(X_train, y_train) | |
| model = train_model(pipeline, X_train, y_train, problem_type, strategy) | |
| metrics = evaluate_model(model, X_test, y_test, problem_type) | |
| # Save model | |
| self.model_io.save(model, "exports/models/trained_model.pkl") | |
| return model, metrics | |