File size: 2,277 Bytes
f1f2665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
from sklearn import model_selection, preprocessing
from traning_zone.traitement_data.feature_engeneering.data_clearning import *
import os
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
def engineering(data, classe):
try :
os.mkdir(f'traning_zone/mini_modèles/{classe}')
try :
os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers')
except FileExistsError:
pass
except FileExistsError:
try :
os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers')
except FileExistsError:
pass
X = data.DESCRIPTION
Y = data.BEM_CLASS_DESC_FR
object = preprocessing.LabelEncoder()
object.fit(Y)
Y = object.transform(Y)
Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y)
print(set(Ytrain))
print(set(Ytest))
tv = TfidfVectorizer()
tv.fit(X)
tv_xtrain = tv.transform(Xtrain)
tv_xtest = tv.transform(Xtest)
joblib.dump(tv, open(f"traning_zone/mini_modèles/{classe}/transformers/tv_transform.pkl", 'wb'))
joblib.dump(object, open(f"traning_zone/mini_modèles/{classe}/transformers/object.pkl", 'wb'))
return tv_xtrain, tv_xtest, Ytrain, Ytest
def engineering_modele(data):
try :
os.mkdir(f'traning_zone/modèles')
try :
os.mkdir(f'traning_zone/modèles/transformers')
except FileExistsError:
pass
except FileExistsError:
try :
os.mkdir(f'traning_zone/modèles/transformers')
except FileExistsError:
pass
X = data.DESCRIPTION
Y = data.Regroupement_de_Class
object = preprocessing.LabelEncoder()
object.fit(Y)
Y = object.transform(Y)
Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y)
print(set(Ytrain))
print(set(Ytest))
tv = TfidfVectorizer()
tv.fit(X)
tv_xtrain = tv.transform(Xtrain)
tv_xtest = tv.transform(Xtest)
joblib.dump(tv, open(f"traning_zone/modèles/transformers/tv_transform.pkl", 'wb'))
joblib.dump(object, open(f"traning_zone/modèles/transformers/object.pkl", 'wb'))
return tv_xtrain, tv_xtest, Ytrain, Ytest |