| from sklearn import model_selection, preprocessing | |
| from traning_zone.traitement_data.feature_engeneering.data_clearning import * | |
| import os | |
| import joblib | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn import preprocessing | |
| def engineering(data, classe): | |
| try : | |
| os.mkdir(f'traning_zone/mini_modèles/{classe}') | |
| try : | |
| os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers') | |
| except FileExistsError: | |
| pass | |
| except FileExistsError: | |
| try : | |
| os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers') | |
| except FileExistsError: | |
| pass | |
| X = data.DESCRIPTION | |
| Y = data.BEM_CLASS_DESC_FR | |
| object = preprocessing.LabelEncoder() | |
| object.fit(Y) | |
| Y = object.transform(Y) | |
| Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y) | |
| print(set(Ytrain)) | |
| print(set(Ytest)) | |
| tv = TfidfVectorizer() | |
| tv.fit(X) | |
| tv_xtrain = tv.transform(Xtrain) | |
| tv_xtest = tv.transform(Xtest) | |
| joblib.dump(tv, open(f"traning_zone/mini_modèles/{classe}/transformers/tv_transform.pkl", 'wb')) | |
| joblib.dump(object, open(f"traning_zone/mini_modèles/{classe}/transformers/object.pkl", 'wb')) | |
| return tv_xtrain, tv_xtest, Ytrain, Ytest | |
| def engineering_modele(data): | |
| try : | |
| os.mkdir(f'traning_zone/modèles') | |
| try : | |
| os.mkdir(f'traning_zone/modèles/transformers') | |
| except FileExistsError: | |
| pass | |
| except FileExistsError: | |
| try : | |
| os.mkdir(f'traning_zone/modèles/transformers') | |
| except FileExistsError: | |
| pass | |
| X = data.DESCRIPTION | |
| Y = data.Regroupement_de_Class | |
| object = preprocessing.LabelEncoder() | |
| object.fit(Y) | |
| Y = object.transform(Y) | |
| Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y) | |
| print(set(Ytrain)) | |
| print(set(Ytest)) | |
| tv = TfidfVectorizer() | |
| tv.fit(X) | |
| tv_xtrain = tv.transform(Xtrain) | |
| tv_xtest = tv.transform(Xtest) | |
| joblib.dump(tv, open(f"traning_zone/modèles/transformers/tv_transform.pkl", 'wb')) | |
| joblib.dump(object, open(f"traning_zone/modèles/transformers/object.pkl", 'wb')) | |
| return tv_xtrain, tv_xtest, Ytrain, Ytest |