Spaces:
Sleeping
Sleeping
File size: 1,445 Bytes
521bf0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import os
from sklearn.model_selection import train_test_split
from ..tabular.pipelines import build_preprocessing_pipeline
from ..tabular.trainers import train_model
from ..tabular.evaluators import evaluate_model
#from ..nlp.trainers import TextClassifier
#from ..nlp.evaluators import evaluate_nlp_model
from ..utils.model_io import ModelIO
class ModelFactory:
def __init__(self):
self.model_io = ModelIO()
def build_and_train(self, df, target_column, dataset_info, problem_type, strategy):
if dataset_info["small_data"]:
raise ValueError("Dataset is too small for training. Minimum 1200 rows required.")
if problem_type == "nlp":
raise ValueError("NLP functionality is not supported in this version.")
else:
# Tabular
X = df.drop(columns=[target_column])
y = df[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"])
pipeline.fit(X_train, y_train)
model = train_model(pipeline, X_train, y_train, problem_type, strategy)
metrics = evaluate_model(model, X_test, y_test, problem_type)
# Save model
self.model_io.save(model, "exports/models/trained_model.pkl")
return model, metrics
|