from sklearn import model_selection, preprocessing from traning_zone.traitement_data.feature_engeneering.data_clearning import * import os import joblib from sklearn.feature_extraction.text import TfidfVectorizer from sklearn import preprocessing def engineering(data, classe): try : os.mkdir(f'traning_zone/mini_modèles/{classe}') try : os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers') except FileExistsError: pass except FileExistsError: try : os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers') except FileExistsError: pass X = data.DESCRIPTION Y = data.BEM_CLASS_DESC_FR object = preprocessing.LabelEncoder() object.fit(Y) Y = object.transform(Y) Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y) print(set(Ytrain)) print(set(Ytest)) tv = TfidfVectorizer() tv.fit(X) tv_xtrain = tv.transform(Xtrain) tv_xtest = tv.transform(Xtest) joblib.dump(tv, open(f"traning_zone/mini_modèles/{classe}/transformers/tv_transform.pkl", 'wb')) joblib.dump(object, open(f"traning_zone/mini_modèles/{classe}/transformers/object.pkl", 'wb')) return tv_xtrain, tv_xtest, Ytrain, Ytest def engineering_modele(data): try : os.mkdir(f'traning_zone/modèles') try : os.mkdir(f'traning_zone/modèles/transformers') except FileExistsError: pass except FileExistsError: try : os.mkdir(f'traning_zone/modèles/transformers') except FileExistsError: pass X = data.DESCRIPTION Y = data.Regroupement_de_Class object = preprocessing.LabelEncoder() object.fit(Y) Y = object.transform(Y) Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y) print(set(Ytrain)) print(set(Ytest)) tv = TfidfVectorizer() tv.fit(X) tv_xtrain = tv.transform(Xtrain) tv_xtest = tv.transform(Xtest) joblib.dump(tv, open(f"traning_zone/modèles/transformers/tv_transform.pkl", 'wb')) joblib.dump(object, open(f"traning_zone/modèles/transformers/object.pkl", 'wb')) return tv_xtrain, tv_xtest, Ytrain, Ytest