Pankaj001 commited on
Commit
effd366
·
verified ·
1 Parent(s): 381f0f2

Delete wine-quality.ipynb

Browse files
Files changed (1) hide show
  1. wine-quality.ipynb +0 -776
wine-quality.ipynb DELETED
@@ -1,776 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "c616e307",
6
- "metadata": {},
7
- "source": [
8
- "# 1.0 Importing libraries"
9
- ]
10
- },
11
- {
12
- "cell_type": "code",
13
- "execution_count": 1,
14
- "id": "51c6d132",
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "\"\"\"\n",
19
- "Description: Import libraries\n",
20
- "\"\"\"\n",
21
- "import numpy as np\n",
22
- "from sklearn.model_selection import train_test_split\n",
23
- "from sklearn import metrics\n",
24
- "import pandas as pd\n",
25
- "import os\n",
26
- "import random\n",
27
- "from humanfriendly import format_timespan\n",
28
- "from sklearn.preprocessing import MinMaxScaler\n",
29
- "from sklearn.ensemble import RandomForestClassifier\n",
30
- "import pickle\n",
31
- "# from sklearn.svm import SVC\n",
32
- "# from sklearn.linear_model import LogisticRegression"
33
- ]
34
- },
35
- {
36
- "cell_type": "code",
37
- "execution_count": 2,
38
- "id": "da89b445",
39
- "metadata": {},
40
- "outputs": [],
41
- "source": [
42
- "\"\"\"\n",
43
- "Description: Specify data path\n",
44
- "\"\"\"\n",
45
- "data_path = r'data\\winequality_red_label_mapped.csv'"
46
- ]
47
- },
48
- {
49
- "cell_type": "code",
50
- "execution_count": 3,
51
- "id": "6840333a",
52
- "metadata": {},
53
- "outputs": [
54
- {
55
- "data": {
56
- "text/html": [
57
- "<div>\n",
58
- "<style scoped>\n",
59
- " .dataframe tbody tr th:only-of-type {\n",
60
- " vertical-align: middle;\n",
61
- " }\n",
62
- "\n",
63
- " .dataframe tbody tr th {\n",
64
- " vertical-align: top;\n",
65
- " }\n",
66
- "\n",
67
- " .dataframe thead th {\n",
68
- " text-align: right;\n",
69
- " }\n",
70
- "</style>\n",
71
- "<table border=\"1\" class=\"dataframe\">\n",
72
- " <thead>\n",
73
- " <tr style=\"text-align: right;\">\n",
74
- " <th></th>\n",
75
- " <th>fixed acidity</th>\n",
76
- " <th>volatile acidity</th>\n",
77
- " <th>citric acid</th>\n",
78
- " <th>residual sugar</th>\n",
79
- " <th>chlorides</th>\n",
80
- " <th>free sulfur dioxide</th>\n",
81
- " <th>total sulfur dioxide</th>\n",
82
- " <th>density</th>\n",
83
- " <th>pH</th>\n",
84
- " <th>sulphates</th>\n",
85
- " <th>alcohol</th>\n",
86
- " <th>quality</th>\n",
87
- " </tr>\n",
88
- " </thead>\n",
89
- " <tbody>\n",
90
- " <tr>\n",
91
- " <th>0</th>\n",
92
- " <td>7.4</td>\n",
93
- " <td>0.70</td>\n",
94
- " <td>0.00</td>\n",
95
- " <td>1.9</td>\n",
96
- " <td>0.076</td>\n",
97
- " <td>11.0</td>\n",
98
- " <td>34.0</td>\n",
99
- " <td>0.9978</td>\n",
100
- " <td>3.51</td>\n",
101
- " <td>0.56</td>\n",
102
- " <td>9.4</td>\n",
103
- " <td>5</td>\n",
104
- " </tr>\n",
105
- " <tr>\n",
106
- " <th>1</th>\n",
107
- " <td>7.8</td>\n",
108
- " <td>0.88</td>\n",
109
- " <td>0.00</td>\n",
110
- " <td>2.6</td>\n",
111
- " <td>0.098</td>\n",
112
- " <td>25.0</td>\n",
113
- " <td>67.0</td>\n",
114
- " <td>0.9968</td>\n",
115
- " <td>3.20</td>\n",
116
- " <td>0.68</td>\n",
117
- " <td>9.8</td>\n",
118
- " <td>5</td>\n",
119
- " </tr>\n",
120
- " <tr>\n",
121
- " <th>2</th>\n",
122
- " <td>7.8</td>\n",
123
- " <td>0.76</td>\n",
124
- " <td>0.04</td>\n",
125
- " <td>2.3</td>\n",
126
- " <td>0.092</td>\n",
127
- " <td>15.0</td>\n",
128
- " <td>54.0</td>\n",
129
- " <td>0.9970</td>\n",
130
- " <td>3.26</td>\n",
131
- " <td>0.65</td>\n",
132
- " <td>9.8</td>\n",
133
- " <td>5</td>\n",
134
- " </tr>\n",
135
- " <tr>\n",
136
- " <th>3</th>\n",
137
- " <td>11.2</td>\n",
138
- " <td>0.28</td>\n",
139
- " <td>0.56</td>\n",
140
- " <td>1.9</td>\n",
141
- " <td>0.075</td>\n",
142
- " <td>17.0</td>\n",
143
- " <td>60.0</td>\n",
144
- " <td>0.9980</td>\n",
145
- " <td>3.16</td>\n",
146
- " <td>0.58</td>\n",
147
- " <td>9.8</td>\n",
148
- " <td>6</td>\n",
149
- " </tr>\n",
150
- " <tr>\n",
151
- " <th>4</th>\n",
152
- " <td>7.4</td>\n",
153
- " <td>0.70</td>\n",
154
- " <td>0.00</td>\n",
155
- " <td>1.9</td>\n",
156
- " <td>0.076</td>\n",
157
- " <td>11.0</td>\n",
158
- " <td>34.0</td>\n",
159
- " <td>0.9978</td>\n",
160
- " <td>3.51</td>\n",
161
- " <td>0.56</td>\n",
162
- " <td>9.4</td>\n",
163
- " <td>5</td>\n",
164
- " </tr>\n",
165
- " </tbody>\n",
166
- "</table>\n",
167
- "</div>"
168
- ],
169
- "text/plain": [
170
- " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
171
- "0 7.4 0.70 0.00 1.9 0.076 \n",
172
- "1 7.8 0.88 0.00 2.6 0.098 \n",
173
- "2 7.8 0.76 0.04 2.3 0.092 \n",
174
- "3 11.2 0.28 0.56 1.9 0.075 \n",
175
- "4 7.4 0.70 0.00 1.9 0.076 \n",
176
- "\n",
177
- " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
178
- "0 11.0 34.0 0.9978 3.51 0.56 \n",
179
- "1 25.0 67.0 0.9968 3.20 0.68 \n",
180
- "2 15.0 54.0 0.9970 3.26 0.65 \n",
181
- "3 17.0 60.0 0.9980 3.16 0.58 \n",
182
- "4 11.0 34.0 0.9978 3.51 0.56 \n",
183
- "\n",
184
- " alcohol quality \n",
185
- "0 9.4 5 \n",
186
- "1 9.8 5 \n",
187
- "2 9.8 5 \n",
188
- "3 9.8 6 \n",
189
- "4 9.4 5 "
190
- ]
191
- },
192
- "execution_count": 3,
193
- "metadata": {},
194
- "output_type": "execute_result"
195
- }
196
- ],
197
- "source": [
198
- "\"\"\"\n",
199
- "Description: Load data\n",
200
- "\"\"\"\n",
201
- "df = pd.read_csv(data_path)\n",
202
- "df.head()"
203
- ]
204
- },
205
- {
206
- "cell_type": "code",
207
- "execution_count": 5,
208
- "id": "e94cd1bf",
209
- "metadata": {},
210
- "outputs": [
211
- {
212
- "data": {
213
- "text/plain": [
214
- "array([3, 4, 5, 6, 7, 8], dtype=int64)"
215
- ]
216
- },
217
- "execution_count": 5,
218
- "metadata": {},
219
- "output_type": "execute_result"
220
- }
221
- ],
222
- "source": [
223
- "\"\"\"\n",
224
- "Description: Get classes\n",
225
- "\"\"\"\n",
226
- "np.unique(df['quality'])"
227
- ]
228
- },
229
- {
230
- "cell_type": "code",
231
- "execution_count": 6,
232
- "id": "cdb3c56f",
233
- "metadata": {},
234
- "outputs": [],
235
- "source": [
236
- "\"\"\"\n",
237
- "Description: Remap \n",
238
- "\"\"\"\n",
239
- "# df['quality'] = df['quality'].apply(lambda x: x-3)"
240
- ]
241
- },
242
- {
243
- "cell_type": "code",
244
- "execution_count": 7,
245
- "id": "d7c49345",
246
- "metadata": {},
247
- "outputs": [
248
- {
249
- "data": {
250
- "text/html": [
251
- "<div>\n",
252
- "<style scoped>\n",
253
- " .dataframe tbody tr th:only-of-type {\n",
254
- " vertical-align: middle;\n",
255
- " }\n",
256
- "\n",
257
- " .dataframe tbody tr th {\n",
258
- " vertical-align: top;\n",
259
- " }\n",
260
- "\n",
261
- " .dataframe thead th {\n",
262
- " text-align: right;\n",
263
- " }\n",
264
- "</style>\n",
265
- "<table border=\"1\" class=\"dataframe\">\n",
266
- " <thead>\n",
267
- " <tr style=\"text-align: right;\">\n",
268
- " <th></th>\n",
269
- " <th>fixed acidity</th>\n",
270
- " <th>volatile acidity</th>\n",
271
- " <th>citric acid</th>\n",
272
- " <th>residual sugar</th>\n",
273
- " <th>chlorides</th>\n",
274
- " <th>free sulfur dioxide</th>\n",
275
- " <th>total sulfur dioxide</th>\n",
276
- " <th>density</th>\n",
277
- " <th>pH</th>\n",
278
- " <th>sulphates</th>\n",
279
- " <th>alcohol</th>\n",
280
- " <th>quality</th>\n",
281
- " </tr>\n",
282
- " </thead>\n",
283
- " <tbody>\n",
284
- " <tr>\n",
285
- " <th>0</th>\n",
286
- " <td>7.4</td>\n",
287
- " <td>0.70</td>\n",
288
- " <td>0.00</td>\n",
289
- " <td>1.9</td>\n",
290
- " <td>0.076</td>\n",
291
- " <td>11.0</td>\n",
292
- " <td>34.0</td>\n",
293
- " <td>0.9978</td>\n",
294
- " <td>3.51</td>\n",
295
- " <td>0.56</td>\n",
296
- " <td>9.4</td>\n",
297
- " <td>2</td>\n",
298
- " </tr>\n",
299
- " <tr>\n",
300
- " <th>1</th>\n",
301
- " <td>7.8</td>\n",
302
- " <td>0.88</td>\n",
303
- " <td>0.00</td>\n",
304
- " <td>2.6</td>\n",
305
- " <td>0.098</td>\n",
306
- " <td>25.0</td>\n",
307
- " <td>67.0</td>\n",
308
- " <td>0.9968</td>\n",
309
- " <td>3.20</td>\n",
310
- " <td>0.68</td>\n",
311
- " <td>9.8</td>\n",
312
- " <td>2</td>\n",
313
- " </tr>\n",
314
- " <tr>\n",
315
- " <th>2</th>\n",
316
- " <td>7.8</td>\n",
317
- " <td>0.76</td>\n",
318
- " <td>0.04</td>\n",
319
- " <td>2.3</td>\n",
320
- " <td>0.092</td>\n",
321
- " <td>15.0</td>\n",
322
- " <td>54.0</td>\n",
323
- " <td>0.9970</td>\n",
324
- " <td>3.26</td>\n",
325
- " <td>0.65</td>\n",
326
- " <td>9.8</td>\n",
327
- " <td>2</td>\n",
328
- " </tr>\n",
329
- " <tr>\n",
330
- " <th>3</th>\n",
331
- " <td>11.2</td>\n",
332
- " <td>0.28</td>\n",
333
- " <td>0.56</td>\n",
334
- " <td>1.9</td>\n",
335
- " <td>0.075</td>\n",
336
- " <td>17.0</td>\n",
337
- " <td>60.0</td>\n",
338
- " <td>0.9980</td>\n",
339
- " <td>3.16</td>\n",
340
- " <td>0.58</td>\n",
341
- " <td>9.8</td>\n",
342
- " <td>3</td>\n",
343
- " </tr>\n",
344
- " <tr>\n",
345
- " <th>4</th>\n",
346
- " <td>7.4</td>\n",
347
- " <td>0.70</td>\n",
348
- " <td>0.00</td>\n",
349
- " <td>1.9</td>\n",
350
- " <td>0.076</td>\n",
351
- " <td>11.0</td>\n",
352
- " <td>34.0</td>\n",
353
- " <td>0.9978</td>\n",
354
- " <td>3.51</td>\n",
355
- " <td>0.56</td>\n",
356
- " <td>9.4</td>\n",
357
- " <td>2</td>\n",
358
- " </tr>\n",
359
- " </tbody>\n",
360
- "</table>\n",
361
- "</div>"
362
- ],
363
- "text/plain": [
364
- " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
365
- "0 7.4 0.70 0.00 1.9 0.076 \n",
366
- "1 7.8 0.88 0.00 2.6 0.098 \n",
367
- "2 7.8 0.76 0.04 2.3 0.092 \n",
368
- "3 11.2 0.28 0.56 1.9 0.075 \n",
369
- "4 7.4 0.70 0.00 1.9 0.076 \n",
370
- "\n",
371
- " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
372
- "0 11.0 34.0 0.9978 3.51 0.56 \n",
373
- "1 25.0 67.0 0.9968 3.20 0.68 \n",
374
- "2 15.0 54.0 0.9970 3.26 0.65 \n",
375
- "3 17.0 60.0 0.9980 3.16 0.58 \n",
376
- "4 11.0 34.0 0.9978 3.51 0.56 \n",
377
- "\n",
378
- " alcohol quality \n",
379
- "0 9.4 2 \n",
380
- "1 9.8 2 \n",
381
- "2 9.8 2 \n",
382
- "3 9.8 3 \n",
383
- "4 9.4 2 "
384
- ]
385
- },
386
- "execution_count": 7,
387
- "metadata": {},
388
- "output_type": "execute_result"
389
- }
390
- ],
391
- "source": [
392
- "df.head()"
393
- ]
394
- },
395
- {
396
- "cell_type": "code",
397
- "execution_count": 8,
398
- "id": "2f6f4d22",
399
- "metadata": {},
400
- "outputs": [
401
- {
402
- "data": {
403
- "text/plain": [
404
- "array([0, 1, 2, 3, 4, 5], dtype=int64)"
405
- ]
406
- },
407
- "execution_count": 8,
408
- "metadata": {},
409
- "output_type": "execute_result"
410
- }
411
- ],
412
- "source": [
413
- "\"\"\"\n",
414
- "Description: Get classes\n",
415
- "\"\"\"\n",
416
- "np.unique(df['quality'])"
417
- ]
418
- },
419
- {
420
- "cell_type": "code",
421
- "execution_count": 22,
422
- "id": "75bd7cab",
423
- "metadata": {},
424
- "outputs": [],
425
- "source": [
426
- "df.to_csv(\"winequality_red_label_remapped.csv\",index=False)"
427
- ]
428
- },
429
- {
430
- "cell_type": "code",
431
- "execution_count": 10,
432
- "id": "74986d6d",
433
- "metadata": {},
434
- "outputs": [
435
- {
436
- "data": {
437
- "text/plain": [
438
- "fixed acidity 0\n",
439
- "volatile acidity 0\n",
440
- "citric acid 0\n",
441
- "residual sugar 0\n",
442
- "chlorides 0\n",
443
- "free sulfur dioxide 0\n",
444
- "total sulfur dioxide 0\n",
445
- "density 0\n",
446
- "pH 0\n",
447
- "sulphates 0\n",
448
- "alcohol 0\n",
449
- "quality 0\n",
450
- "dtype: int64"
451
- ]
452
- },
453
- "execution_count": 10,
454
- "metadata": {},
455
- "output_type": "execute_result"
456
- }
457
- ],
458
- "source": [
459
- "\"\"\"\n",
460
- "Description: Check null value\n",
461
- "\"\"\"\n",
462
- "df.isnull().sum()"
463
- ]
464
- },
465
- {
466
- "cell_type": "code",
467
- "execution_count": 11,
468
- "id": "74d7e3e9",
469
- "metadata": {},
470
- "outputs": [
471
- {
472
- "data": {
473
- "text/plain": [
474
- "(1599, 11)"
475
- ]
476
- },
477
- "execution_count": 11,
478
- "metadata": {},
479
- "output_type": "execute_result"
480
- }
481
- ],
482
- "source": [
483
- "\"\"\"\n",
484
- "Description: Prepare data\n",
485
- "\"\"\"\n",
486
- "x=df.drop(['quality'], axis=1)\n",
487
- "x.shape"
488
- ]
489
- },
490
- {
491
- "cell_type": "code",
492
- "execution_count": 12,
493
- "id": "68302b25",
494
- "metadata": {},
495
- "outputs": [
496
- {
497
- "data": {
498
- "text/plain": [
499
- "(1599,)"
500
- ]
501
- },
502
- "execution_count": 12,
503
- "metadata": {},
504
- "output_type": "execute_result"
505
- }
506
- ],
507
- "source": [
508
- "\"\"\"\n",
509
- "Description: Get target label\n",
510
- "\"\"\"\n",
511
- "y = df['quality']\n",
512
- "y.shape"
513
- ]
514
- },
515
- {
516
- "cell_type": "code",
517
- "execution_count": 13,
518
- "id": "53e3c6d9",
519
- "metadata": {},
520
- "outputs": [],
521
- "source": [
522
- "\"\"\"\n",
523
- "Description: Split data\n",
524
- "\"\"\"\n",
525
- "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=40,stratify=y)"
526
- ]
527
- },
528
- {
529
- "cell_type": "code",
530
- "execution_count": 14,
531
- "id": "e0a5d87b",
532
- "metadata": {},
533
- "outputs": [
534
- {
535
- "name": "stdout",
536
- "output_type": "stream",
537
- "text": [
538
- "shape of x_train: (1279, 11)\n",
539
- "shape of y_train: (1279,)\n",
540
- "shape of x_test: (320, 11)\n",
541
- "shape of y_test: (320,)\n"
542
- ]
543
- }
544
- ],
545
- "source": [
546
- "'''\n",
547
- "Description : Check size of dataset\n",
548
- "'''\n",
549
- "print(\"shape of x_train: \",x_train.shape)\n",
550
- "print(\"shape of y_train: {}\".format(y_train.shape))\n",
551
- "print(f'shape of x_test: {x_test.shape}')\n",
552
- "print(f'shape of y_test: {y_test.shape}')"
553
- ]
554
- },
555
- {
556
- "cell_type": "code",
557
- "execution_count": 15,
558
- "id": "85ee67b4",
559
- "metadata": {},
560
- "outputs": [
561
- {
562
- "data": {
563
- "text/html": [
564
- "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=1000)</pre></div></div></div></div></div>"
565
- ],
566
- "text/plain": [
567
- "RandomForestClassifier(n_estimators=1000)"
568
- ]
569
- },
570
- "execution_count": 15,
571
- "metadata": {},
572
- "output_type": "execute_result"
573
- }
574
- ],
575
- "source": [
576
- "\"\"\"\n",
577
- "Description: Create model architecture\n",
578
- "\"\"\"\n",
579
- "model = RandomForestClassifier(n_estimators=1000)\n",
580
- "model"
581
- ]
582
- },
583
- {
584
- "cell_type": "code",
585
- "execution_count": 16,
586
- "id": "455b3f11",
587
- "metadata": {},
588
- "outputs": [
589
- {
590
- "data": {
591
- "text/html": [
592
- "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=1000)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=1000)</pre></div></div></div></div></div>"
593
- ],
594
- "text/plain": [
595
- "RandomForestClassifier(n_estimators=1000)"
596
- ]
597
- },
598
- "execution_count": 16,
599
- "metadata": {},
600
- "output_type": "execute_result"
601
- }
602
- ],
603
- "source": [
604
- "\"\"\"\n",
605
- "Description: Train model\n",
606
- "\"\"\"\n",
607
- "model.fit(x_train, y_train)"
608
- ]
609
- },
610
- {
611
- "cell_type": "code",
612
- "execution_count": 17,
613
- "id": "7df61cdd",
614
- "metadata": {
615
- "scrolled": true
616
- },
617
- "outputs": [
618
- {
619
- "name": "stdout",
620
- "output_type": "stream",
621
- "text": [
622
- "RandomForestClassifier(n_estimators=1000) : \n",
623
- "Training Accuracy : 1.0\n",
624
- "Validation Accuracy : 0.671875\n"
625
- ]
626
- }
627
- ],
628
- "source": [
629
- "\"\"\"\n",
630
- "Description: Get training and test accuracy\n",
631
- "\"\"\"\n",
632
- "print(f'{model} : ')\n",
633
- "print('Training Accuracy : ', metrics.accuracy_score(y_train, model.predict(x_train)))\n",
634
- "print('Validation Accuracy : ', metrics.accuracy_score(y_test, model.predict(x_test)))"
635
- ]
636
- },
637
- {
638
- "cell_type": "code",
639
- "execution_count": 18,
640
- "id": "a843b76d",
641
- "metadata": {},
642
- "outputs": [],
643
- "source": [
644
- "pickle.dump(model, open(\"random_forest_model.pkl\", 'wb'))"
645
- ]
646
- },
647
- {
648
- "cell_type": "code",
649
- "execution_count": 19,
650
- "id": "ca4970af",
651
- "metadata": {},
652
- "outputs": [
653
- {
654
- "data": {
655
- "text/plain": [
656
- "fixed acidity 15.90000\n",
657
- "volatile acidity 1.58000\n",
658
- "citric acid 1.00000\n",
659
- "residual sugar 15.50000\n",
660
- "chlorides 0.61100\n",
661
- "free sulfur dioxide 72.00000\n",
662
- "total sulfur dioxide 289.00000\n",
663
- "density 1.00369\n",
664
- "pH 4.01000\n",
665
- "sulphates 2.00000\n",
666
- "alcohol 14.90000\n",
667
- "quality 5.00000\n",
668
- "dtype: float64"
669
- ]
670
- },
671
- "execution_count": 19,
672
- "metadata": {},
673
- "output_type": "execute_result"
674
- }
675
- ],
676
- "source": [
677
- "\"\"\"\n",
678
- "Description: min, max\n",
679
- "\"\"\"\n",
680
- "df.max()"
681
- ]
682
- },
683
- {
684
- "cell_type": "code",
685
- "execution_count": 20,
686
- "id": "8b0bbc0b",
687
- "metadata": {},
688
- "outputs": [
689
- {
690
- "data": {
691
- "text/plain": [
692
- "fixed acidity 4.60000\n",
693
- "volatile acidity 0.12000\n",
694
- "citric acid 0.00000\n",
695
- "residual sugar 0.90000\n",
696
- "chlorides 0.01200\n",
697
- "free sulfur dioxide 1.00000\n",
698
- "total sulfur dioxide 6.00000\n",
699
- "density 0.99007\n",
700
- "pH 2.74000\n",
701
- "sulphates 0.33000\n",
702
- "alcohol 8.40000\n",
703
- "quality 0.00000\n",
704
- "dtype: float64"
705
- ]
706
- },
707
- "execution_count": 20,
708
- "metadata": {},
709
- "output_type": "execute_result"
710
- }
711
- ],
712
- "source": [
713
- "\"\"\"\n",
714
- "Description: min, max\n",
715
- "\"\"\"\n",
716
- "df.min()"
717
- ]
718
- },
719
- {
720
- "cell_type": "code",
721
- "execution_count": 21,
722
- "id": "19942bd8",
723
- "metadata": {},
724
- "outputs": [
725
- {
726
- "data": {
727
- "text/plain": [
728
- "Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',\n",
729
- " 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',\n",
730
- " 'pH', 'sulphates', 'alcohol', 'quality'],\n",
731
- " dtype='object')"
732
- ]
733
- },
734
- "execution_count": 21,
735
- "metadata": {},
736
- "output_type": "execute_result"
737
- }
738
- ],
739
- "source": [
740
- "\"\"\"\n",
741
- "Description: Check columns\n",
742
- "\"\"\"\n",
743
- "df.columns"
744
- ]
745
- },
746
- {
747
- "cell_type": "code",
748
- "execution_count": null,
749
- "id": "d191f49f",
750
- "metadata": {},
751
- "outputs": [],
752
- "source": []
753
- }
754
- ],
755
- "metadata": {
756
- "kernelspec": {
757
- "display_name": "Python 3 (ipykernel)",
758
- "language": "python",
759
- "name": "python3"
760
- },
761
- "language_info": {
762
- "codemirror_mode": {
763
- "name": "ipython",
764
- "version": 3
765
- },
766
- "file_extension": ".py",
767
- "mimetype": "text/x-python",
768
- "name": "python",
769
- "nbconvert_exporter": "python",
770
- "pygments_lexer": "ipython3",
771
- "version": "3.9.0"
772
- }
773
- },
774
- "nbformat": 4,
775
- "nbformat_minor": 5
776
- }