COULIBALY BOURAHIMA
first commit
f1f2665
from sklearn import model_selection, preprocessing
from traning_zone.traitement_data.feature_engeneering.data_clearning import *
import os
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
def engineering(data, classe):
try :
os.mkdir(f'traning_zone/mini_modèles/{classe}')
try :
os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers')
except FileExistsError:
pass
except FileExistsError:
try :
os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers')
except FileExistsError:
pass
X = data.DESCRIPTION
Y = data.BEM_CLASS_DESC_FR
object = preprocessing.LabelEncoder()
object.fit(Y)
Y = object.transform(Y)
Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y)
print(set(Ytrain))
print(set(Ytest))
tv = TfidfVectorizer()
tv.fit(X)
tv_xtrain = tv.transform(Xtrain)
tv_xtest = tv.transform(Xtest)
joblib.dump(tv, open(f"traning_zone/mini_modèles/{classe}/transformers/tv_transform.pkl", 'wb'))
joblib.dump(object, open(f"traning_zone/mini_modèles/{classe}/transformers/object.pkl", 'wb'))
return tv_xtrain, tv_xtest, Ytrain, Ytest
def engineering_modele(data):
try :
os.mkdir(f'traning_zone/modèles')
try :
os.mkdir(f'traning_zone/modèles/transformers')
except FileExistsError:
pass
except FileExistsError:
try :
os.mkdir(f'traning_zone/modèles/transformers')
except FileExistsError:
pass
X = data.DESCRIPTION
Y = data.Regroupement_de_Class
object = preprocessing.LabelEncoder()
object.fit(Y)
Y = object.transform(Y)
Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y)
print(set(Ytrain))
print(set(Ytest))
tv = TfidfVectorizer()
tv.fit(X)
tv_xtrain = tv.transform(Xtrain)
tv_xtest = tv.transform(Xtest)
joblib.dump(tv, open(f"traning_zone/modèles/transformers/tv_transform.pkl", 'wb'))
joblib.dump(object, open(f"traning_zone/modèles/transformers/object.pkl", 'wb'))
return tv_xtrain, tv_xtest, Ytrain, Ytest