import os from sklearn.model_selection import train_test_split from ..tabular.pipelines import build_preprocessing_pipeline from ..tabular.trainers import train_model from ..tabular.evaluators import evaluate_model #from ..nlp.trainers import TextClassifier #from ..nlp.evaluators import evaluate_nlp_model from ..utils.model_io import ModelIO class ModelFactory: def __init__(self): self.model_io = ModelIO() def build_and_train(self, df, target_column, dataset_info, problem_type, strategy): if dataset_info["small_data"]: raise ValueError("Dataset is too small for training. Minimum 1200 rows required.") if problem_type == "nlp": raise ValueError("NLP functionality is not supported in this version.") else: # Tabular X = df.drop(columns=[target_column]) y = df[target_column] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"]) pipeline.fit(X_train, y_train) model = train_model(pipeline, X_train, y_train, problem_type, strategy) metrics = evaluate_model(model, X_test, y_test, problem_type) # Save model self.model_io.save(model, "exports/models/trained_model.pkl") return model, metrics