File size: 2,277 Bytes
f1f2665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from sklearn import model_selection, preprocessing
from traning_zone.traitement_data.feature_engeneering.data_clearning import *

import os
import joblib

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import  preprocessing


def engineering(data, classe):

    try :
        os.mkdir(f'traning_zone/mini_modèles/{classe}')
        try :
            os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers')
        except FileExistsError: 
            pass
    except FileExistsError:
        try :
            os.mkdir(f'traning_zone/mini_modèles/{classe}/transformers')
        except FileExistsError: 
            pass

    X = data.DESCRIPTION
    Y = data.BEM_CLASS_DESC_FR

    object = preprocessing.LabelEncoder()
    object.fit(Y)
    Y = object.transform(Y)

    Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y)

    print(set(Ytrain))
    print(set(Ytest))
    tv = TfidfVectorizer()
    tv.fit(X)
    tv_xtrain = tv.transform(Xtrain)
    tv_xtest = tv.transform(Xtest)

    joblib.dump(tv, open(f"traning_zone/mini_modèles/{classe}/transformers/tv_transform.pkl", 'wb'))
    joblib.dump(object, open(f"traning_zone/mini_modèles/{classe}/transformers/object.pkl", 'wb'))

    return tv_xtrain, tv_xtest,  Ytrain, Ytest



def engineering_modele(data):

    try :
        os.mkdir(f'traning_zone/modèles')
        try :
            os.mkdir(f'traning_zone/modèles/transformers')
        except FileExistsError: 
            pass
    except FileExistsError:
        try :
            os.mkdir(f'traning_zone/modèles/transformers')
        except FileExistsError: 
            pass

    X = data.DESCRIPTION
    Y = data.Regroupement_de_Class

    object = preprocessing.LabelEncoder()
    object.fit(Y)
    Y = object.transform(Y)

    Xtrain, Xtest, Ytrain, Ytest = model_selection.train_test_split(X, Y,stratify=Y)

    print(set(Ytrain))
    print(set(Ytest))
    tv = TfidfVectorizer()
    tv.fit(X)
    tv_xtrain = tv.transform(Xtrain)
    tv_xtest = tv.transform(Xtest)

    joblib.dump(tv, open(f"traning_zone/modèles/transformers/tv_transform.pkl", 'wb'))
    joblib.dump(object, open(f"traning_zone/modèles/transformers/object.pkl", 'wb'))

    return tv_xtrain, tv_xtest,  Ytrain, Ytest