quangtn266 commited on
Commit
7947ea8
·
1 Parent(s): e9e898b

new changes

Browse files
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Example user template template
2
+ ### Example user template
3
+
4
+ # IntelliJ project files
5
+ .idea
6
+ *.iml
7
+ out
8
+ gen
9
+ .DS_store
10
+ CICD-for-Machine-Learning
Makefile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ install:
2
+ pip install --upgrade pip && pip install -r requirements.txt
3
+
4
+ format:
5
+ black *.py
6
+
7
+ train:
8
+ python train.python
9
+
10
+ eval:
11
+ echo 'Model metrics' > report.md
12
+ cat ./results/metrics.txt >> report.md
13
+
14
+ echo '\n Confusion Matrix Plot' >> report.md
15
+ echo '![Confusion Matrix](./results/model_results.png)' >> report.md
16
+
17
+ cml comment create report.md
data/drug200.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Age,Sex,BP,Cholesterol,Na_to_K,Drug
2
+ 23,F,HIGH,HIGH,25.355,DrugY
3
+ 47,M,LOW,HIGH,13.093,drugC
4
+ 47,M,LOW,HIGH,10.114,drugC
5
+ 28,F,NORMAL,HIGH,7.798,drugX
6
+ 61,F,LOW,HIGH,18.043,DrugY
7
+ 22,F,NORMAL,HIGH,8.607,drugX
8
+ 49,F,NORMAL,HIGH,16.275,DrugY
9
+ 41,M,LOW,HIGH,11.037,drugC
10
+ 60,M,NORMAL,HIGH,15.171,DrugY
11
+ 43,M,LOW,NORMAL,19.368,DrugY
12
+ 47,F,LOW,HIGH,11.767,drugC
13
+ 34,F,HIGH,NORMAL,19.199,DrugY
14
+ 43,M,LOW,HIGH,15.376,DrugY
15
+ 74,F,LOW,HIGH,20.942,DrugY
16
+ 50,F,NORMAL,HIGH,12.703,drugX
17
+ 16,F,HIGH,NORMAL,15.516,DrugY
18
+ 69,M,LOW,NORMAL,11.455,drugX
19
+ 43,M,HIGH,HIGH,13.972,drugA
20
+ 23,M,LOW,HIGH,7.298,drugC
21
+ 32,F,HIGH,NORMAL,25.974,DrugY
22
+ 57,M,LOW,NORMAL,19.128,DrugY
23
+ 63,M,NORMAL,HIGH,25.917,DrugY
24
+ 47,M,LOW,NORMAL,30.568,DrugY
25
+ 48,F,LOW,HIGH,15.036,DrugY
26
+ 33,F,LOW,HIGH,33.486,DrugY
27
+ 28,F,HIGH,NORMAL,18.809,DrugY
28
+ 31,M,HIGH,HIGH,30.366,DrugY
29
+ 49,F,NORMAL,NORMAL,9.381,drugX
30
+ 39,F,LOW,NORMAL,22.697,DrugY
31
+ 45,M,LOW,HIGH,17.951,DrugY
32
+ 18,F,NORMAL,NORMAL,8.75,drugX
33
+ 74,M,HIGH,HIGH,9.567,drugB
34
+ 49,M,LOW,NORMAL,11.014,drugX
35
+ 65,F,HIGH,NORMAL,31.876,DrugY
36
+ 53,M,NORMAL,HIGH,14.133,drugX
37
+ 46,M,NORMAL,NORMAL,7.285,drugX
38
+ 32,M,HIGH,NORMAL,9.445,drugA
39
+ 39,M,LOW,NORMAL,13.938,drugX
40
+ 39,F,NORMAL,NORMAL,9.709,drugX
41
+ 15,M,NORMAL,HIGH,9.084,drugX
42
+ 73,F,NORMAL,HIGH,19.221,DrugY
43
+ 58,F,HIGH,NORMAL,14.239,drugB
44
+ 50,M,NORMAL,NORMAL,15.79,DrugY
45
+ 23,M,NORMAL,HIGH,12.26,drugX
46
+ 50,F,NORMAL,NORMAL,12.295,drugX
47
+ 66,F,NORMAL,NORMAL,8.107,drugX
48
+ 37,F,HIGH,HIGH,13.091,drugA
49
+ 68,M,LOW,HIGH,10.291,drugC
50
+ 23,M,NORMAL,HIGH,31.686,DrugY
51
+ 28,F,LOW,HIGH,19.796,DrugY
52
+ 58,F,HIGH,HIGH,19.416,DrugY
53
+ 67,M,NORMAL,NORMAL,10.898,drugX
54
+ 62,M,LOW,NORMAL,27.183,DrugY
55
+ 24,F,HIGH,NORMAL,18.457,DrugY
56
+ 68,F,HIGH,NORMAL,10.189,drugB
57
+ 26,F,LOW,HIGH,14.16,drugC
58
+ 65,M,HIGH,NORMAL,11.34,drugB
59
+ 40,M,HIGH,HIGH,27.826,DrugY
60
+ 60,M,NORMAL,NORMAL,10.091,drugX
61
+ 34,M,HIGH,HIGH,18.703,DrugY
62
+ 38,F,LOW,NORMAL,29.875,DrugY
63
+ 24,M,HIGH,NORMAL,9.475,drugA
64
+ 67,M,LOW,NORMAL,20.693,DrugY
65
+ 45,M,LOW,NORMAL,8.37,drugX
66
+ 60,F,HIGH,HIGH,13.303,drugB
67
+ 68,F,NORMAL,NORMAL,27.05,DrugY
68
+ 29,M,HIGH,HIGH,12.856,drugA
69
+ 17,M,NORMAL,NORMAL,10.832,drugX
70
+ 54,M,NORMAL,HIGH,24.658,DrugY
71
+ 18,F,HIGH,NORMAL,24.276,DrugY
72
+ 70,M,HIGH,HIGH,13.967,drugB
73
+ 28,F,NORMAL,HIGH,19.675,DrugY
74
+ 24,F,NORMAL,HIGH,10.605,drugX
75
+ 41,F,NORMAL,NORMAL,22.905,DrugY
76
+ 31,M,HIGH,NORMAL,17.069,DrugY
77
+ 26,M,LOW,NORMAL,20.909,DrugY
78
+ 36,F,HIGH,HIGH,11.198,drugA
79
+ 26,F,HIGH,NORMAL,19.161,DrugY
80
+ 19,F,HIGH,HIGH,13.313,drugA
81
+ 32,F,LOW,NORMAL,10.84,drugX
82
+ 60,M,HIGH,HIGH,13.934,drugB
83
+ 64,M,NORMAL,HIGH,7.761,drugX
84
+ 32,F,LOW,HIGH,9.712,drugC
85
+ 38,F,HIGH,NORMAL,11.326,drugA
86
+ 47,F,LOW,HIGH,10.067,drugC
87
+ 59,M,HIGH,HIGH,13.935,drugB
88
+ 51,F,NORMAL,HIGH,13.597,drugX
89
+ 69,M,LOW,HIGH,15.478,DrugY
90
+ 37,F,HIGH,NORMAL,23.091,DrugY
91
+ 50,F,NORMAL,NORMAL,17.211,DrugY
92
+ 62,M,NORMAL,HIGH,16.594,DrugY
93
+ 41,M,HIGH,NORMAL,15.156,DrugY
94
+ 29,F,HIGH,HIGH,29.45,DrugY
95
+ 42,F,LOW,NORMAL,29.271,DrugY
96
+ 56,M,LOW,HIGH,15.015,DrugY
97
+ 36,M,LOW,NORMAL,11.424,drugX
98
+ 58,F,LOW,HIGH,38.247,DrugY
99
+ 56,F,HIGH,HIGH,25.395,DrugY
100
+ 20,M,HIGH,NORMAL,35.639,DrugY
101
+ 15,F,HIGH,NORMAL,16.725,DrugY
102
+ 31,M,HIGH,NORMAL,11.871,drugA
103
+ 45,F,HIGH,HIGH,12.854,drugA
104
+ 28,F,LOW,HIGH,13.127,drugC
105
+ 56,M,NORMAL,HIGH,8.966,drugX
106
+ 22,M,HIGH,NORMAL,28.294,DrugY
107
+ 37,M,LOW,NORMAL,8.968,drugX
108
+ 22,M,NORMAL,HIGH,11.953,drugX
109
+ 42,M,LOW,HIGH,20.013,DrugY
110
+ 72,M,HIGH,NORMAL,9.677,drugB
111
+ 23,M,NORMAL,HIGH,16.85,DrugY
112
+ 50,M,HIGH,HIGH,7.49,drugA
113
+ 47,F,NORMAL,NORMAL,6.683,drugX
114
+ 35,M,LOW,NORMAL,9.17,drugX
115
+ 65,F,LOW,NORMAL,13.769,drugX
116
+ 20,F,NORMAL,NORMAL,9.281,drugX
117
+ 51,M,HIGH,HIGH,18.295,DrugY
118
+ 67,M,NORMAL,NORMAL,9.514,drugX
119
+ 40,F,NORMAL,HIGH,10.103,drugX
120
+ 32,F,HIGH,NORMAL,10.292,drugA
121
+ 61,F,HIGH,HIGH,25.475,DrugY
122
+ 28,M,NORMAL,HIGH,27.064,DrugY
123
+ 15,M,HIGH,NORMAL,17.206,DrugY
124
+ 34,M,NORMAL,HIGH,22.456,DrugY
125
+ 36,F,NORMAL,HIGH,16.753,DrugY
126
+ 53,F,HIGH,NORMAL,12.495,drugB
127
+ 19,F,HIGH,NORMAL,25.969,DrugY
128
+ 66,M,HIGH,HIGH,16.347,DrugY
129
+ 35,M,NORMAL,NORMAL,7.845,drugX
130
+ 47,M,LOW,NORMAL,33.542,DrugY
131
+ 32,F,NORMAL,HIGH,7.477,drugX
132
+ 70,F,NORMAL,HIGH,20.489,DrugY
133
+ 52,M,LOW,NORMAL,32.922,DrugY
134
+ 49,M,LOW,NORMAL,13.598,drugX
135
+ 24,M,NORMAL,HIGH,25.786,DrugY
136
+ 42,F,HIGH,HIGH,21.036,DrugY
137
+ 74,M,LOW,NORMAL,11.939,drugX
138
+ 55,F,HIGH,HIGH,10.977,drugB
139
+ 35,F,HIGH,HIGH,12.894,drugA
140
+ 51,M,HIGH,NORMAL,11.343,drugB
141
+ 69,F,NORMAL,HIGH,10.065,drugX
142
+ 49,M,HIGH,NORMAL,6.269,drugA
143
+ 64,F,LOW,NORMAL,25.741,DrugY
144
+ 60,M,HIGH,NORMAL,8.621,drugB
145
+ 74,M,HIGH,NORMAL,15.436,DrugY
146
+ 39,M,HIGH,HIGH,9.664,drugA
147
+ 61,M,NORMAL,HIGH,9.443,drugX
148
+ 37,F,LOW,NORMAL,12.006,drugX
149
+ 26,F,HIGH,NORMAL,12.307,drugA
150
+ 61,F,LOW,NORMAL,7.34,drugX
151
+ 22,M,LOW,HIGH,8.151,drugC
152
+ 49,M,HIGH,NORMAL,8.7,drugA
153
+ 68,M,HIGH,HIGH,11.009,drugB
154
+ 55,M,NORMAL,NORMAL,7.261,drugX
155
+ 72,F,LOW,NORMAL,14.642,drugX
156
+ 37,M,LOW,NORMAL,16.724,DrugY
157
+ 49,M,LOW,HIGH,10.537,drugC
158
+ 31,M,HIGH,NORMAL,11.227,drugA
159
+ 53,M,LOW,HIGH,22.963,DrugY
160
+ 59,F,LOW,HIGH,10.444,drugC
161
+ 34,F,LOW,NORMAL,12.923,drugX
162
+ 30,F,NORMAL,HIGH,10.443,drugX
163
+ 57,F,HIGH,NORMAL,9.945,drugB
164
+ 43,M,NORMAL,NORMAL,12.859,drugX
165
+ 21,F,HIGH,NORMAL,28.632,DrugY
166
+ 16,M,HIGH,NORMAL,19.007,DrugY
167
+ 38,M,LOW,HIGH,18.295,DrugY
168
+ 58,F,LOW,HIGH,26.645,DrugY
169
+ 57,F,NORMAL,HIGH,14.216,drugX
170
+ 51,F,LOW,NORMAL,23.003,DrugY
171
+ 20,F,HIGH,HIGH,11.262,drugA
172
+ 28,F,NORMAL,HIGH,12.879,drugX
173
+ 45,M,LOW,NORMAL,10.017,drugX
174
+ 39,F,NORMAL,NORMAL,17.225,DrugY
175
+ 41,F,LOW,NORMAL,18.739,DrugY
176
+ 42,M,HIGH,NORMAL,12.766,drugA
177
+ 73,F,HIGH,HIGH,18.348,DrugY
178
+ 48,M,HIGH,NORMAL,10.446,drugA
179
+ 25,M,NORMAL,HIGH,19.011,DrugY
180
+ 39,M,NORMAL,HIGH,15.969,DrugY
181
+ 67,F,NORMAL,HIGH,15.891,DrugY
182
+ 22,F,HIGH,NORMAL,22.818,DrugY
183
+ 59,F,NORMAL,HIGH,13.884,drugX
184
+ 20,F,LOW,NORMAL,11.686,drugX
185
+ 36,F,HIGH,NORMAL,15.49,DrugY
186
+ 18,F,HIGH,HIGH,37.188,DrugY
187
+ 57,F,NORMAL,NORMAL,25.893,DrugY
188
+ 70,M,HIGH,HIGH,9.849,drugB
189
+ 47,M,HIGH,HIGH,10.403,drugA
190
+ 65,M,HIGH,NORMAL,34.997,DrugY
191
+ 64,M,HIGH,NORMAL,20.932,DrugY
192
+ 58,M,HIGH,HIGH,18.991,DrugY
193
+ 23,M,HIGH,HIGH,8.011,drugA
194
+ 72,M,LOW,HIGH,16.31,DrugY
195
+ 72,M,LOW,HIGH,6.769,drugC
196
+ 46,F,HIGH,HIGH,34.686,DrugY
197
+ 56,F,LOW,HIGH,11.567,drugC
198
+ 16,M,LOW,HIGH,12.006,drugC
199
+ 52,M,NORMAL,HIGH,9.894,drugX
200
+ 23,M,NORMAL,NORMAL,14.02,drugX
201
+ 40,F,LOW,NORMAL,11.349,drugX
model/drug_pipeline.skops ADDED
Binary file (388 kB). View file
 
results/metrics.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ Accuracy=0.98, F1_score = 0.94
results/model_result.png ADDED
train.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import skops.io as sio
3
+ from sklearn.compose import ColumnTransformer
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.impute import SimpleImputer
6
+ from sklearn.metrics import accuracy_score, f1_score
7
+ from sklearn.pipeline import Pipeline
8
+ from sklearn.preprocessing import OrdinalEncoder, StandardScaler
9
+
10
+ # Loading data.
11
+ drug_df = pd.read_csv("./data/drug200.csv")
12
+ drug_df = drug_df.sample(frac=1)
13
+
14
+ # train/ test split
15
+ from sklearn.model_selection import train_test_split
16
+
17
+ X = drug_df.drop("Drug", axis=1).values
18
+ y = drug_df.Drug.values
19
+
20
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=125)
21
+
22
+ # Pipeline
23
+ cat_col = [1, 2, 3]
24
+ num_col = [0, 4]
25
+
26
+ transform = ColumnTransformer(
27
+ [
28
+ ("encoder", OrdinalEncoder(), cat_col),
29
+ ("num_imputer", SimpleImputer(strategy="median"), num_col),
30
+ ("num_scaler", StandardScaler(), num_col),
31
+ ]
32
+ )
33
+
34
+ pipe = Pipeline(
35
+ steps = [
36
+ ("preprocessing", transform),
37
+ ("model", RandomForestClassifier(n_estimators=10, random_state=125)),
38
+ ]
39
+ )
40
+
41
+ # train
42
+ pipe.fit(X_train, y_train)
43
+
44
+ # Model Evaluation
45
+ predictions = pipe.predict(X_test)
46
+ accuracy = accuracy_score(y_test, predictions)
47
+ f1 = f1_score(y_test, predictions, average="macro")
48
+
49
+ print("Accuracy: ", str(round(accuracy, 2)*100)+"%","F1: ", round(f1, 2))
50
+
51
+ # Confusion matrix
52
+ import matplotlib.pyplot as plt
53
+ from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
54
+
55
+ predictions = pipe.predict(X_test)
56
+ cm = confusion_matrix(y_test, predictions, labels=pipe.classes_)
57
+ disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe.classes_)
58
+ disp.plot()
59
+ plt.savefig("./results/model_result.png", dpi=120)
60
+
61
+ # Write metrics to files
62
+ with open("./results/metrics.txt", "w") as outfile:
63
+ outfile.write(f"\nAccuracy={round(accuracy, 2)}, F1_score = {round(f1, 2)}")
64
+
65
+ # Save the model
66
+ sio.dump(pipe, "./model/drug_pipeline.skops")