Pankaj001 commited on
Commit
fc500c7
·
verified ·
1 Parent(s): 91ce4ed

Upload wine-quality.ipynb

Browse files
Files changed (1) hide show
  1. wine-quality.ipynb +776 -0
wine-quality.ipynb ADDED
@@ -0,0 +1,776 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "4cd4f44d",
6
+ "metadata": {},
7
+ "source": [
8
+ "# 1.0 Importing libraries"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "310b8768",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "\"\"\"\n",
19
+ "Description: Import libraries\n",
20
+ "\"\"\"\n",
21
+ "import numpy as np\n",
22
+ "from sklearn.model_selection import train_test_split\n",
23
+ "from sklearn import metrics\n",
24
+ "import pandas as pd\n",
25
+ "import os\n",
26
+ "import random\n",
27
+ "from humanfriendly import format_timespan\n",
28
+ "from sklearn.preprocessing import MinMaxScaler\n",
29
+ "from sklearn.ensemble import RandomForestClassifier\n",
30
+ "import pickle\n",
31
+ "# from sklearn.svm import SVC\n",
32
+ "# from sklearn.linear_model import LogisticRegression"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 2,
38
+ "id": "774be350",
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "\"\"\"\n",
43
+ "Description: Specify data path\n",
44
+ "\"\"\"\n",
45
+ "data_path = r'data\\winequality-red.csv'"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 3,
51
+ "id": "fadf56e5",
52
+ "metadata": {},
53
+ "outputs": [
54
+ {
55
+ "data": {
56
+ "text/html": [
57
+ "<div>\n",
58
+ "<style scoped>\n",
59
+ " .dataframe tbody tr th:only-of-type {\n",
60
+ " vertical-align: middle;\n",
61
+ " }\n",
62
+ "\n",
63
+ " .dataframe tbody tr th {\n",
64
+ " vertical-align: top;\n",
65
+ " }\n",
66
+ "\n",
67
+ " .dataframe thead th {\n",
68
+ " text-align: right;\n",
69
+ " }\n",
70
+ "</style>\n",
71
+ "<table border=\"1\" class=\"dataframe\">\n",
72
+ " <thead>\n",
73
+ " <tr style=\"text-align: right;\">\n",
74
+ " <th></th>\n",
75
+ " <th>fixed acidity</th>\n",
76
+ " <th>volatile acidity</th>\n",
77
+ " <th>citric acid</th>\n",
78
+ " <th>residual sugar</th>\n",
79
+ " <th>chlorides</th>\n",
80
+ " <th>free sulfur dioxide</th>\n",
81
+ " <th>total sulfur dioxide</th>\n",
82
+ " <th>density</th>\n",
83
+ " <th>pH</th>\n",
84
+ " <th>sulphates</th>\n",
85
+ " <th>alcohol</th>\n",
86
+ " <th>quality</th>\n",
87
+ " </tr>\n",
88
+ " </thead>\n",
89
+ " <tbody>\n",
90
+ " <tr>\n",
91
+ " <th>0</th>\n",
92
+ " <td>7.4</td>\n",
93
+ " <td>0.70</td>\n",
94
+ " <td>0.00</td>\n",
95
+ " <td>1.9</td>\n",
96
+ " <td>0.076</td>\n",
97
+ " <td>11.0</td>\n",
98
+ " <td>34.0</td>\n",
99
+ " <td>0.9978</td>\n",
100
+ " <td>3.51</td>\n",
101
+ " <td>0.56</td>\n",
102
+ " <td>9.4</td>\n",
103
+ " <td>5</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>1</th>\n",
107
+ " <td>7.8</td>\n",
108
+ " <td>0.88</td>\n",
109
+ " <td>0.00</td>\n",
110
+ " <td>2.6</td>\n",
111
+ " <td>0.098</td>\n",
112
+ " <td>25.0</td>\n",
113
+ " <td>67.0</td>\n",
114
+ " <td>0.9968</td>\n",
115
+ " <td>3.20</td>\n",
116
+ " <td>0.68</td>\n",
117
+ " <td>9.8</td>\n",
118
+ " <td>5</td>\n",
119
+ " </tr>\n",
120
+ " <tr>\n",
121
+ " <th>2</th>\n",
122
+ " <td>7.8</td>\n",
123
+ " <td>0.76</td>\n",
124
+ " <td>0.04</td>\n",
125
+ " <td>2.3</td>\n",
126
+ " <td>0.092</td>\n",
127
+ " <td>15.0</td>\n",
128
+ " <td>54.0</td>\n",
129
+ " <td>0.9970</td>\n",
130
+ " <td>3.26</td>\n",
131
+ " <td>0.65</td>\n",
132
+ " <td>9.8</td>\n",
133
+ " <td>5</td>\n",
134
+ " </tr>\n",
135
+ " <tr>\n",
136
+ " <th>3</th>\n",
137
+ " <td>11.2</td>\n",
138
+ " <td>0.28</td>\n",
139
+ " <td>0.56</td>\n",
140
+ " <td>1.9</td>\n",
141
+ " <td>0.075</td>\n",
142
+ " <td>17.0</td>\n",
143
+ " <td>60.0</td>\n",
144
+ " <td>0.9980</td>\n",
145
+ " <td>3.16</td>\n",
146
+ " <td>0.58</td>\n",
147
+ " <td>9.8</td>\n",
148
+ " <td>6</td>\n",
149
+ " </tr>\n",
150
+ " <tr>\n",
151
+ " <th>4</th>\n",
152
+ " <td>7.4</td>\n",
153
+ " <td>0.70</td>\n",
154
+ " <td>0.00</td>\n",
155
+ " <td>1.9</td>\n",
156
+ " <td>0.076</td>\n",
157
+ " <td>11.0</td>\n",
158
+ " <td>34.0</td>\n",
159
+ " <td>0.9978</td>\n",
160
+ " <td>3.51</td>\n",
161
+ " <td>0.56</td>\n",
162
+ " <td>9.4</td>\n",
163
+ " <td>5</td>\n",
164
+ " </tr>\n",
165
+ " </tbody>\n",
166
+ "</table>\n",
167
+ "</div>"
168
+ ],
169
+ "text/plain": [
170
+ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
171
+ "0 7.4 0.70 0.00 1.9 0.076 \n",
172
+ "1 7.8 0.88 0.00 2.6 0.098 \n",
173
+ "2 7.8 0.76 0.04 2.3 0.092 \n",
174
+ "3 11.2 0.28 0.56 1.9 0.075 \n",
175
+ "4 7.4 0.70 0.00 1.9 0.076 \n",
176
+ "\n",
177
+ " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
178
+ "0 11.0 34.0 0.9978 3.51 0.56 \n",
179
+ "1 25.0 67.0 0.9968 3.20 0.68 \n",
180
+ "2 15.0 54.0 0.9970 3.26 0.65 \n",
181
+ "3 17.0 60.0 0.9980 3.16 0.58 \n",
182
+ "4 11.0 34.0 0.9978 3.51 0.56 \n",
183
+ "\n",
184
+ " alcohol quality \n",
185
+ "0 9.4 5 \n",
186
+ "1 9.8 5 \n",
187
+ "2 9.8 5 \n",
188
+ "3 9.8 6 \n",
189
+ "4 9.4 5 "
190
+ ]
191
+ },
192
+ "execution_count": 3,
193
+ "metadata": {},
194
+ "output_type": "execute_result"
195
+ }
196
+ ],
197
+ "source": [
198
+ "\"\"\"\n",
199
+ "Description: Load data\n",
200
+ "\"\"\"\n",
201
+ "df = pd.read_csv(data_path)\n",
202
+ "df.head()"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 5,
208
+ "id": "89ebf918",
209
+ "metadata": {},
210
+ "outputs": [
211
+ {
212
+ "data": {
213
+ "text/plain": [
214
+ "array([3, 4, 5, 6, 7, 8], dtype=int64)"
215
+ ]
216
+ },
217
+ "execution_count": 5,
218
+ "metadata": {},
219
+ "output_type": "execute_result"
220
+ }
221
+ ],
222
+ "source": [
223
+ "\"\"\"\n",
224
+ "Description: Get classes\n",
225
+ "\"\"\"\n",
226
+ "np.unique(df['quality'])"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 6,
232
+ "id": "af729b52",
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": [
236
+ "\"\"\"\n",
237
+ "Description: Remap \n",
238
+ "\"\"\"\n",
239
+ "df['quality'] = df['quality'].apply(lambda x: x-3)"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 7,
245
+ "id": "433901b5",
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "data": {
250
+ "text/html": [
251
+ "<div>\n",
252
+ "<style scoped>\n",
253
+ " .dataframe tbody tr th:only-of-type {\n",
254
+ " vertical-align: middle;\n",
255
+ " }\n",
256
+ "\n",
257
+ " .dataframe tbody tr th {\n",
258
+ " vertical-align: top;\n",
259
+ " }\n",
260
+ "\n",
261
+ " .dataframe thead th {\n",
262
+ " text-align: right;\n",
263
+ " }\n",
264
+ "</style>\n",
265
+ "<table border=\"1\" class=\"dataframe\">\n",
266
+ " <thead>\n",
267
+ " <tr style=\"text-align: right;\">\n",
268
+ " <th></th>\n",
269
+ " <th>fixed acidity</th>\n",
270
+ " <th>volatile acidity</th>\n",
271
+ " <th>citric acid</th>\n",
272
+ " <th>residual sugar</th>\n",
273
+ " <th>chlorides</th>\n",
274
+ " <th>free sulfur dioxide</th>\n",
275
+ " <th>total sulfur dioxide</th>\n",
276
+ " <th>density</th>\n",
277
+ " <th>pH</th>\n",
278
+ " <th>sulphates</th>\n",
279
+ " <th>alcohol</th>\n",
280
+ " <th>quality</th>\n",
281
+ " </tr>\n",
282
+ " </thead>\n",
283
+ " <tbody>\n",
284
+ " <tr>\n",
285
+ " <th>0</th>\n",
286
+ " <td>7.4</td>\n",
287
+ " <td>0.70</td>\n",
288
+ " <td>0.00</td>\n",
289
+ " <td>1.9</td>\n",
290
+ " <td>0.076</td>\n",
291
+ " <td>11.0</td>\n",
292
+ " <td>34.0</td>\n",
293
+ " <td>0.9978</td>\n",
294
+ " <td>3.51</td>\n",
295
+ " <td>0.56</td>\n",
296
+ " <td>9.4</td>\n",
297
+ " <td>2</td>\n",
298
+ " </tr>\n",
299
+ " <tr>\n",
300
+ " <th>1</th>\n",
301
+ " <td>7.8</td>\n",
302
+ " <td>0.88</td>\n",
303
+ " <td>0.00</td>\n",
304
+ " <td>2.6</td>\n",
305
+ " <td>0.098</td>\n",
306
+ " <td>25.0</td>\n",
307
+ " <td>67.0</td>\n",
308
+ " <td>0.9968</td>\n",
309
+ " <td>3.20</td>\n",
310
+ " <td>0.68</td>\n",
311
+ " <td>9.8</td>\n",
312
+ " <td>2</td>\n",
313
+ " </tr>\n",
314
+ " <tr>\n",
315
+ " <th>2</th>\n",
316
+ " <td>7.8</td>\n",
317
+ " <td>0.76</td>\n",
318
+ " <td>0.04</td>\n",
319
+ " <td>2.3</td>\n",
320
+ " <td>0.092</td>\n",
321
+ " <td>15.0</td>\n",
322
+ " <td>54.0</td>\n",
323
+ " <td>0.9970</td>\n",
324
+ " <td>3.26</td>\n",
325
+ " <td>0.65</td>\n",
326
+ " <td>9.8</td>\n",
327
+ " <td>2</td>\n",
328
+ " </tr>\n",
329
+ " <tr>\n",
330
+ " <th>3</th>\n",
331
+ " <td>11.2</td>\n",
332
+ " <td>0.28</td>\n",
333
+ " <td>0.56</td>\n",
334
+ " <td>1.9</td>\n",
335
+ " <td>0.075</td>\n",
336
+ " <td>17.0</td>\n",
337
+ " <td>60.0</td>\n",
338
+ " <td>0.9980</td>\n",
339
+ " <td>3.16</td>\n",
340
+ " <td>0.58</td>\n",
341
+ " <td>9.8</td>\n",
342
+ " <td>3</td>\n",
343
+ " </tr>\n",
344
+ " <tr>\n",
345
+ " <th>4</th>\n",
346
+ " <td>7.4</td>\n",
347
+ " <td>0.70</td>\n",
348
+ " <td>0.00</td>\n",
349
+ " <td>1.9</td>\n",
350
+ " <td>0.076</td>\n",
351
+ " <td>11.0</td>\n",
352
+ " <td>34.0</td>\n",
353
+ " <td>0.9978</td>\n",
354
+ " <td>3.51</td>\n",
355
+ " <td>0.56</td>\n",
356
+ " <td>9.4</td>\n",
357
+ " <td>2</td>\n",
358
+ " </tr>\n",
359
+ " </tbody>\n",
360
+ "</table>\n",
361
+ "</div>"
362
+ ],
363
+ "text/plain": [
364
+ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
365
+ "0 7.4 0.70 0.00 1.9 0.076 \n",
366
+ "1 7.8 0.88 0.00 2.6 0.098 \n",
367
+ "2 7.8 0.76 0.04 2.3 0.092 \n",
368
+ "3 11.2 0.28 0.56 1.9 0.075 \n",
369
+ "4 7.4 0.70 0.00 1.9 0.076 \n",
370
+ "\n",
371
+ " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
372
+ "0 11.0 34.0 0.9978 3.51 0.56 \n",
373
+ "1 25.0 67.0 0.9968 3.20 0.68 \n",
374
+ "2 15.0 54.0 0.9970 3.26 0.65 \n",
375
+ "3 17.0 60.0 0.9980 3.16 0.58 \n",
376
+ "4 11.0 34.0 0.9978 3.51 0.56 \n",
377
+ "\n",
378
+ " alcohol quality \n",
379
+ "0 9.4 2 \n",
380
+ "1 9.8 2 \n",
381
+ "2 9.8 2 \n",
382
+ "3 9.8 3 \n",
383
+ "4 9.4 2 "
384
+ ]
385
+ },
386
+ "execution_count": 7,
387
+ "metadata": {},
388
+ "output_type": "execute_result"
389
+ }
390
+ ],
391
+ "source": [
392
+ "df.head()"
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "code",
397
+ "execution_count": 8,
398
+ "id": "323d455f",
399
+ "metadata": {},
400
+ "outputs": [
401
+ {
402
+ "data": {
403
+ "text/plain": [
404
+ "array([0, 1, 2, 3, 4, 5], dtype=int64)"
405
+ ]
406
+ },
407
+ "execution_count": 8,
408
+ "metadata": {},
409
+ "output_type": "execute_result"
410
+ }
411
+ ],
412
+ "source": [
413
+ "\"\"\"\n",
414
+ "Description: Get classes\n",
415
+ "\"\"\"\n",
416
+ "np.unique(df['quality'])"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "code",
421
+ "execution_count": 22,
422
+ "id": "77eb5d07",
423
+ "metadata": {},
424
+ "outputs": [],
425
+ "source": [
426
+ "df.to_csv(\"winequality_red_label_remapped.csv\",index=False)"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": 10,
432
+ "id": "e7a1f0ca",
433
+ "metadata": {},
434
+ "outputs": [
435
+ {
436
+ "data": {
437
+ "text/plain": [
438
+ "fixed acidity 0\n",
439
+ "volatile acidity 0\n",
440
+ "citric acid 0\n",
441
+ "residual sugar 0\n",
442
+ "chlorides 0\n",
443
+ "free sulfur dioxide 0\n",
444
+ "total sulfur dioxide 0\n",
445
+ "density 0\n",
446
+ "pH 0\n",
447
+ "sulphates 0\n",
448
+ "alcohol 0\n",
449
+ "quality 0\n",
450
+ "dtype: int64"
451
+ ]
452
+ },
453
+ "execution_count": 10,
454
+ "metadata": {},
455
+ "output_type": "execute_result"
456
+ }
457
+ ],
458
+ "source": [
459
+ "\"\"\"\n",
460
+ "Description: Check null value\n",
461
+ "\"\"\"\n",
462
+ "df.isnull().sum()"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 11,
468
+ "id": "5b4f9181",
469
+ "metadata": {},
470
+ "outputs": [
471
+ {
472
+ "data": {
473
+ "text/plain": [
474
+ "(1599, 11)"
475
+ ]
476
+ },
477
+ "execution_count": 11,
478
+ "metadata": {},
479
+ "output_type": "execute_result"
480
+ }
481
+ ],
482
+ "source": [
483
+ "\"\"\"\n",
484
+ "Description: Prepare data\n",
485
+ "\"\"\"\n",
486
+ "x=df.drop(['quality'], axis=1)\n",
487
+ "x.shape"
488
+ ]
489
+ },
490
+ {
491
+ "cell_type": "code",
492
+ "execution_count": 12,
493
+ "id": "b3bfa179",
494
+ "metadata": {},
495
+ "outputs": [
496
+ {
497
+ "data": {
498
+ "text/plain": [
499
+ "(1599,)"
500
+ ]
501
+ },
502
+ "execution_count": 12,
503
+ "metadata": {},
504
+ "output_type": "execute_result"
505
+ }
506
+ ],
507
+ "source": [
508
+ "\"\"\"\n",
509
+ "Description: Get target label\n",
510
+ "\"\"\"\n",
511
+ "y = df['quality']\n",
512
+ "y.shape"
513
+ ]
514
+ },
515
+ {
516
+ "cell_type": "code",
517
+ "execution_count": 13,
518
+ "id": "37595c39",
519
+ "metadata": {},
520
+ "outputs": [],
521
+ "source": [
522
+ "\"\"\"\n",
523
+ "Description: Split data\n",
524
+ "\"\"\"\n",
525
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=40,stratify=y)"
526
+ ]
527
+ },
528
+ {
529
+ "cell_type": "code",
530
+ "execution_count": 14,
531
+ "id": "2a1d6b28",
532
+ "metadata": {},
533
+ "outputs": [
534
+ {
535
+ "name": "stdout",
536
+ "output_type": "stream",
537
+ "text": [
538
+ "shape of x_train: (1279, 11)\n",
539
+ "shape of y_train: (1279,)\n",
540
+ "shape of x_test: (320, 11)\n",
541
+ "shape of y_test: (320,)\n"
542
+ ]
543
+ }
544
+ ],
545
+ "source": [
546
+ "'''\n",
547
+ "Description : Check size of dataset\n",
548
+ "'''\n",
549
+ "print(\"shape of x_train: \",x_train.shape)\n",
550
+ "print(\"shape of y_train: {}\".format(y_train.shape))\n",
551
+ "print(f'shape of x_test: {x_test.shape}')\n",
552
+ "print(f'shape of y_test: {y_test.shape}')"
553
+ ]
554
+ },
555
+ {
556
+ "cell_type": "code",
557
+ "execution_count": 15,
558
+ "id": "5a5c7fa9",
559
+ "metadata": {},
560
+ "outputs": [
561
+ {
562
+ "data": {
563
+ "text/html": [
564
+ "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=1000)</pre></div></div></div></div></div>"
565
+ ],
566
+ "text/plain": [
567
+ "RandomForestClassifier(n_estimators=1000)"
568
+ ]
569
+ },
570
+ "execution_count": 15,
571
+ "metadata": {},
572
+ "output_type": "execute_result"
573
+ }
574
+ ],
575
+ "source": [
576
+ "\"\"\"\n",
577
+ "Description: Create model architecture\n",
578
+ "\"\"\"\n",
579
+ "model = RandomForestClassifier(n_estimators=1000)\n",
580
+ "model"
581
+ ]
582
+ },
583
+ {
584
+ "cell_type": "code",
585
+ "execution_count": 16,
586
+ "id": "900b39d0",
587
+ "metadata": {},
588
+ "outputs": [
589
+ {
590
+ "data": {
591
+ "text/html": [
592
+ "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=1000)</pre></div></div></div></div></div>"
593
+ ],
594
+ "text/plain": [
595
+ "RandomForestClassifier(n_estimators=1000)"
596
+ ]
597
+ },
598
+ "execution_count": 16,
599
+ "metadata": {},
600
+ "output_type": "execute_result"
601
+ }
602
+ ],
603
+ "source": [
604
+ "\"\"\"\n",
605
+ "Description: Train model\n",
606
+ "\"\"\"\n",
607
+ "model.fit(x_train, y_train)"
608
+ ]
609
+ },
610
+ {
611
+ "cell_type": "code",
612
+ "execution_count": 17,
613
+ "id": "2335722f",
614
+ "metadata": {
615
+ "scrolled": true
616
+ },
617
+ "outputs": [
618
+ {
619
+ "name": "stdout",
620
+ "output_type": "stream",
621
+ "text": [
622
+ "RandomForestClassifier(n_estimators=1000) : \n",
623
+ "Training Accuracy : 1.0\n",
624
+ "Validation Accuracy : 0.671875\n"
625
+ ]
626
+ }
627
+ ],
628
+ "source": [
629
+ "\"\"\"\n",
630
+ "Description: Get training and test accuracy\n",
631
+ "\"\"\"\n",
632
+ "print(f'{model} : ')\n",
633
+ "print('Training Accuracy : ', metrics.accuracy_score(y_train, model.predict(x_train)))\n",
634
+ "print('Validation Accuracy : ', metrics.accuracy_score(y_test, model.predict(x_test)))"
635
+ ]
636
+ },
637
+ {
638
+ "cell_type": "code",
639
+ "execution_count": 18,
640
+ "id": "bff46a0c",
641
+ "metadata": {},
642
+ "outputs": [],
643
+ "source": [
644
+ "pickle.dump(model, open(\"random_forest_model.pkl\", 'wb'))"
645
+ ]
646
+ },
647
+ {
648
+ "cell_type": "code",
649
+ "execution_count": 19,
650
+ "id": "76fe308f",
651
+ "metadata": {},
652
+ "outputs": [
653
+ {
654
+ "data": {
655
+ "text/plain": [
656
+ "fixed acidity 15.90000\n",
657
+ "volatile acidity 1.58000\n",
658
+ "citric acid 1.00000\n",
659
+ "residual sugar 15.50000\n",
660
+ "chlorides 0.61100\n",
661
+ "free sulfur dioxide 72.00000\n",
662
+ "total sulfur dioxide 289.00000\n",
663
+ "density 1.00369\n",
664
+ "pH 4.01000\n",
665
+ "sulphates 2.00000\n",
666
+ "alcohol 14.90000\n",
667
+ "quality 5.00000\n",
668
+ "dtype: float64"
669
+ ]
670
+ },
671
+ "execution_count": 19,
672
+ "metadata": {},
673
+ "output_type": "execute_result"
674
+ }
675
+ ],
676
+ "source": [
677
+ "\"\"\"\n",
678
+ "Description: min, max\n",
679
+ "\"\"\"\n",
680
+ "df.max()"
681
+ ]
682
+ },
683
+ {
684
+ "cell_type": "code",
685
+ "execution_count": 20,
686
+ "id": "6509f0e4",
687
+ "metadata": {},
688
+ "outputs": [
689
+ {
690
+ "data": {
691
+ "text/plain": [
692
+ "fixed acidity 4.60000\n",
693
+ "volatile acidity 0.12000\n",
694
+ "citric acid 0.00000\n",
695
+ "residual sugar 0.90000\n",
696
+ "chlorides 0.01200\n",
697
+ "free sulfur dioxide 1.00000\n",
698
+ "total sulfur dioxide 6.00000\n",
699
+ "density 0.99007\n",
700
+ "pH 2.74000\n",
701
+ "sulphates 0.33000\n",
702
+ "alcohol 8.40000\n",
703
+ "quality 0.00000\n",
704
+ "dtype: float64"
705
+ ]
706
+ },
707
+ "execution_count": 20,
708
+ "metadata": {},
709
+ "output_type": "execute_result"
710
+ }
711
+ ],
712
+ "source": [
713
+ "\"\"\"\n",
714
+ "Description: min, max\n",
715
+ "\"\"\"\n",
716
+ "df.min()"
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": 21,
722
+ "id": "b6c4e1ae",
723
+ "metadata": {},
724
+ "outputs": [
725
+ {
726
+ "data": {
727
+ "text/plain": [
728
+ "Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',\n",
729
+ " 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',\n",
730
+ " 'pH', 'sulphates', 'alcohol', 'quality'],\n",
731
+ " dtype='object')"
732
+ ]
733
+ },
734
+ "execution_count": 21,
735
+ "metadata": {},
736
+ "output_type": "execute_result"
737
+ }
738
+ ],
739
+ "source": [
740
+ "\"\"\"\n",
741
+ "Description: Check columns\n",
742
+ "\"\"\"\n",
743
+ "df.columns"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": null,
749
+ "id": "857e29c1",
750
+ "metadata": {},
751
+ "outputs": [],
752
+ "source": []
753
+ }
754
+ ],
755
+ "metadata": {
756
+ "kernelspec": {
757
+ "display_name": "Python 3 (ipykernel)",
758
+ "language": "python",
759
+ "name": "python3"
760
+ },
761
+ "language_info": {
762
+ "codemirror_mode": {
763
+ "name": "ipython",
764
+ "version": 3
765
+ },
766
+ "file_extension": ".py",
767
+ "mimetype": "text/x-python",
768
+ "name": "python",
769
+ "nbconvert_exporter": "python",
770
+ "pygments_lexer": "ipython3",
771
+ "version": "3.9.0"
772
+ }
773
+ },
774
+ "nbformat": 4,
775
+ "nbformat_minor": 5
776
+ }