ACA050 commited on
Commit
521bf0e
·
verified ·
1 Parent(s): bebbeee

Update backend/core/model_factory.py

Browse files
Files changed (1) hide show
  1. backend/core/model_factory.py +40 -40
backend/core/model_factory.py CHANGED
@@ -1,40 +1,40 @@
1
- import os
2
- from sklearn.model_selection import train_test_split
3
- from ..tabular.pipelines import build_preprocessing_pipeline
4
- from ..tabular.trainers import train_model
5
- from ..tabular.evaluators import evaluate_model
6
- from ..nlp.trainers import TextClassifier
7
- from ..nlp.evaluators import evaluate_nlp_model
8
- from ..utils.model_io import ModelIO
9
-
10
- class ModelFactory:
11
- def __init__(self):
12
- self.model_io = ModelIO()
13
-
14
- def build_and_train(self, df, target_column, dataset_info, problem_type, strategy):
15
- if dataset_info["small_data"]:
16
- raise ValueError("Dataset is too small for training. Minimum 1200 rows required.")
17
-
18
- if problem_type == "nlp":
19
- raise ValueError("NLP functionality is not supported in this version.")
20
- else:
21
- # Tabular
22
- X = df.drop(columns=[target_column])
23
- y = df[target_column]
24
-
25
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
-
27
- pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"])
28
- pipeline.fit(X_train, y_train)
29
-
30
- model = train_model(pipeline, X_train, y_train, problem_type, strategy)
31
- metrics = evaluate_model(model, X_test, y_test, problem_type)
32
-
33
- # Save model
34
- self.model_io.save(model, "exports/models/trained_model.pkl")
35
-
36
- return model, metrics
37
-
38
-
39
-
40
-
 
1
+ import os
2
+ from sklearn.model_selection import train_test_split
3
+ from ..tabular.pipelines import build_preprocessing_pipeline
4
+ from ..tabular.trainers import train_model
5
+ from ..tabular.evaluators import evaluate_model
6
+ #from ..nlp.trainers import TextClassifier
7
+ #from ..nlp.evaluators import evaluate_nlp_model
8
+ from ..utils.model_io import ModelIO
9
+
10
+ class ModelFactory:
11
+ def __init__(self):
12
+ self.model_io = ModelIO()
13
+
14
+ def build_and_train(self, df, target_column, dataset_info, problem_type, strategy):
15
+ if dataset_info["small_data"]:
16
+ raise ValueError("Dataset is too small for training. Minimum 1200 rows required.")
17
+
18
+ if problem_type == "nlp":
19
+ raise ValueError("NLP functionality is not supported in this version.")
20
+ else:
21
+ # Tabular
22
+ X = df.drop(columns=[target_column])
23
+ y = df[target_column]
24
+
25
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
+
27
+ pipeline = build_preprocessing_pipeline(dataset_info["numeric_cols"], dataset_info["categorical_cols"])
28
+ pipeline.fit(X_train, y_train)
29
+
30
+ model = train_model(pipeline, X_train, y_train, problem_type, strategy)
31
+ metrics = evaluate_model(model, X_test, y_test, problem_type)
32
+
33
+ # Save model
34
+ self.model_io.save(model, "exports/models/trained_model.pkl")
35
+
36
+ return model, metrics
37
+
38
+
39
+
40
+