HudsonArauj commited on
Commit
d45489d
·
1 Parent(s): 6a234aa
Files changed (3) hide show
  1. app/language_detection.ipynb +83 -136
  2. app/main.py +2 -1
  3. app/model/model.py +3 -1
app/language_detection.ipynb CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "cell_type": "code",
17
- "execution_count": 11,
18
  "metadata": {},
19
  "outputs": [],
20
  "source": [
@@ -29,7 +29,7 @@
29
  },
30
  {
31
  "cell_type": "code",
32
- "execution_count": 1,
33
  "metadata": {},
34
  "outputs": [
35
  {
@@ -38,7 +38,7 @@
38
  "'1.3.0'"
39
  ]
40
  },
41
- "execution_count": 1,
42
  "metadata": {},
43
  "output_type": "execute_result"
44
  }
@@ -51,124 +51,16 @@
51
  },
52
  {
53
  "cell_type": "code",
54
- "execution_count": 7,
55
  "metadata": {},
56
- "outputs": [
57
- {
58
- "data": {
59
- "text/html": [
60
- "<div>\n",
61
- "<style scoped>\n",
62
- " .dataframe tbody tr th:only-of-type {\n",
63
- " vertical-align: middle;\n",
64
- " }\n",
65
- "\n",
66
- " .dataframe tbody tr th {\n",
67
- " vertical-align: top;\n",
68
- " }\n",
69
- "\n",
70
- " .dataframe thead th {\n",
71
- " text-align: right;\n",
72
- " }\n",
73
- "</style>\n",
74
- "<table border=\"1\" class=\"dataframe\">\n",
75
- " <thead>\n",
76
- " <tr style=\"text-align: right;\">\n",
77
- " <th></th>\n",
78
- " <th>Text</th>\n",
79
- " <th>Language</th>\n",
80
- " </tr>\n",
81
- " </thead>\n",
82
- " <tbody>\n",
83
- " <tr>\n",
84
- " <th>0</th>\n",
85
- " <td>Nature, in the broadest sense, is the natural...</td>\n",
86
- " <td>English</td>\n",
87
- " </tr>\n",
88
- " <tr>\n",
89
- " <th>1</th>\n",
90
- " <td>\"Nature\" can refer to the phenomena of the phy...</td>\n",
91
- " <td>English</td>\n",
92
- " </tr>\n",
93
- " <tr>\n",
94
- " <th>2</th>\n",
95
- " <td>The study of nature is a large, if not the onl...</td>\n",
96
- " <td>English</td>\n",
97
- " </tr>\n",
98
- " <tr>\n",
99
- " <th>3</th>\n",
100
- " <td>Although humans are part of nature, human acti...</td>\n",
101
- " <td>English</td>\n",
102
- " </tr>\n",
103
- " <tr>\n",
104
- " <th>4</th>\n",
105
- " <td>[1] The word nature is borrowed from the Old F...</td>\n",
106
- " <td>English</td>\n",
107
- " </tr>\n",
108
- " <tr>\n",
109
- " <th>...</th>\n",
110
- " <td>...</td>\n",
111
- " <td>...</td>\n",
112
- " </tr>\n",
113
- " <tr>\n",
114
- " <th>10332</th>\n",
115
- " <td>ನಿಮ್ಮ ತಪ್ಪು ಏನು ಬಂದಿದೆಯೆಂದರೆ ಆ ದಿನದಿಂದ ನಿಮಗೆ ಒ...</td>\n",
116
- " <td>Kannada</td>\n",
117
- " </tr>\n",
118
- " <tr>\n",
119
- " <th>10333</th>\n",
120
- " <td>ನಾರ್ಸಿಸಾ ತಾನು ಮೊದಲಿಗೆ ಹೆಣಗಾಡುತ್ತಿದ್ದ ಮಾರ್ಗಗಳನ್...</td>\n",
121
- " <td>Kannada</td>\n",
122
- " </tr>\n",
123
- " <tr>\n",
124
- " <th>10334</th>\n",
125
- " <td>ಹೇಗೆ ' ನಾರ್ಸಿಸಿಸಮ್ ಈಗ ಮರಿಯನ್ ಅವರಿಗೆ ಸಂಭವಿಸಿದ ಎ...</td>\n",
126
- " <td>Kannada</td>\n",
127
- " </tr>\n",
128
- " <tr>\n",
129
- " <th>10335</th>\n",
130
- " <td>ಅವಳು ಈಗ ಹೆಚ್ಚು ಚಿನ್ನದ ಬ್ರೆಡ್ ಬಯಸುವುದಿಲ್ಲ ಎಂದು ...</td>\n",
131
- " <td>Kannada</td>\n",
132
- " </tr>\n",
133
- " <tr>\n",
134
- " <th>10336</th>\n",
135
- " <td>ಟೆರ್ರಿ ನೀವು ನಿಜವಾಗಿಯೂ ಆ ದೇವದೂತನಂತೆ ಸ್ವಲ್ಪ ಕಾಣು...</td>\n",
136
- " <td>Kannada</td>\n",
137
- " </tr>\n",
138
- " </tbody>\n",
139
- "</table>\n",
140
- "<p>10337 rows × 2 columns</p>\n",
141
- "</div>"
142
- ],
143
- "text/plain": [
144
- " Text Language\n",
145
- "0 Nature, in the broadest sense, is the natural... English\n",
146
- "1 \"Nature\" can refer to the phenomena of the phy... English\n",
147
- "2 The study of nature is a large, if not the onl... English\n",
148
- "3 Although humans are part of nature, human acti... English\n",
149
- "4 [1] The word nature is borrowed from the Old F... English\n",
150
- "... ... ...\n",
151
- "10332 ನಿಮ್ಮ ತಪ್ಪು ಏನು ಬಂದಿದೆಯೆಂದರೆ ಆ ದಿನದಿಂದ ನಿಮಗೆ ಒ... Kannada\n",
152
- "10333 ನಾರ್ಸಿಸಾ ತಾನು ಮೊದಲಿಗೆ ಹೆಣಗಾಡುತ್ತಿದ್ದ ಮಾರ್ಗಗಳನ್... Kannada\n",
153
- "10334 ಹೇಗೆ ' ನಾರ್ಸಿಸಿಸಮ್ ಈಗ ಮರಿಯನ್ ಅವರಿಗೆ ಸಂಭವಿಸಿದ ಎ... Kannada\n",
154
- "10335 ಅವಳು ಈಗ ಹೆಚ್ಚು ಚಿನ್ನದ ಬ್ರೆಡ್ ಬಯಸ��ವುದಿಲ್ಲ ಎಂದು ... Kannada\n",
155
- "10336 ಟೆರ್ರಿ ನೀವು ನಿಜವಾಗಿಯೂ ಆ ದೇವದೂತನಂತೆ ಸ್ವಲ್ಪ ಕಾಣು... Kannada\n",
156
- "\n",
157
- "[10337 rows x 2 columns]"
158
- ]
159
- },
160
- "execution_count": 7,
161
- "metadata": {},
162
- "output_type": "execute_result"
163
- }
164
- ],
165
  "source": [
166
  "data = pd.read_csv('language_detection.csv')"
167
  ]
168
  },
169
  {
170
  "cell_type": "code",
171
- "execution_count": 8,
172
  "metadata": {},
173
  "outputs": [],
174
  "source": [
@@ -178,7 +70,7 @@
178
  },
179
  {
180
  "cell_type": "code",
181
- "execution_count": 9,
182
  "metadata": {},
183
  "outputs": [],
184
  "source": [
@@ -189,7 +81,7 @@
189
  },
190
  {
191
  "cell_type": "code",
192
- "execution_count": 10,
193
  "metadata": {},
194
  "outputs": [
195
  {
@@ -200,7 +92,7 @@
200
  " 'Russian', 'Spanish', 'Sweedish', 'Tamil', 'Turkish'], dtype=object)"
201
  ]
202
  },
203
- "execution_count": 10,
204
  "metadata": {},
205
  "output_type": "execute_result"
206
  }
@@ -211,7 +103,7 @@
211
  },
212
  {
213
  "cell_type": "code",
214
- "execution_count": 12,
215
  "metadata": {},
216
  "outputs": [],
217
  "source": [
@@ -225,7 +117,7 @@
225
  },
226
  {
227
  "cell_type": "code",
228
- "execution_count": 26,
229
  "metadata": {},
230
  "outputs": [],
231
  "source": [
@@ -236,7 +128,7 @@
236
  },
237
  {
238
  "cell_type": "code",
239
- "execution_count": 27,
240
  "metadata": {},
241
  "outputs": [],
242
  "source": [
@@ -248,19 +140,19 @@
248
  },
249
  {
250
  "cell_type": "code",
251
- "execution_count": 28,
252
  "metadata": {},
253
  "outputs": [
254
  {
255
  "data": {
256
  "text/html": [
257
- "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MultinomialNB</label><div class=\"sk-toggleable__content\"><pre>MultinomialNB()</pre></div></div></div></div></div>"
258
  ],
259
  "text/plain": [
260
  "MultinomialNB()"
261
  ]
262
  },
263
- "execution_count": 28,
264
  "metadata": {},
265
  "output_type": "execute_result"
266
  }
@@ -274,7 +166,7 @@
274
  },
275
  {
276
  "cell_type": "code",
277
- "execution_count": 29,
278
  "metadata": {},
279
  "outputs": [],
280
  "source": [
@@ -283,7 +175,7 @@
283
  },
284
  {
285
  "cell_type": "code",
286
- "execution_count": 30,
287
  "metadata": {},
288
  "outputs": [
289
  {
@@ -370,20 +262,20 @@
370
  },
371
  {
372
  "cell_type": "code",
373
- "execution_count": 34,
374
  "metadata": {},
375
  "outputs": [],
376
  "source": [
377
- "# Save the model\n",
378
- "import pickle\n",
379
  "\n",
380
- "with open('trained-01.pkl', 'wb') as file:\n",
381
- " pickle.dump(model, file)"
382
  ]
383
  },
384
  {
385
  "cell_type": "code",
386
- "execution_count": 45,
387
  "metadata": {},
388
  "outputs": [
389
  {
@@ -392,7 +284,7 @@
392
  "array(['Portugeese', 'English'], dtype=object)"
393
  ]
394
  },
395
- "execution_count": 45,
396
  "metadata": {},
397
  "output_type": "execute_result"
398
  }
@@ -409,19 +301,74 @@
409
  },
410
  {
411
  "cell_type": "code",
412
- "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  "metadata": {},
414
  "outputs": [],
415
  "source": [
 
 
 
 
 
 
 
416
  "\n"
417
  ]
418
  },
419
  {
420
  "cell_type": "code",
421
- "execution_count": null,
422
  "metadata": {},
423
- "outputs": [],
424
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  },
426
  {
427
  "cell_type": "code",
 
14
  },
15
  {
16
  "cell_type": "code",
17
+ "execution_count": 33,
18
  "metadata": {},
19
  "outputs": [],
20
  "source": [
 
29
  },
30
  {
31
  "cell_type": "code",
32
+ "execution_count": 34,
33
  "metadata": {},
34
  "outputs": [
35
  {
 
38
  "'1.3.0'"
39
  ]
40
  },
41
+ "execution_count": 34,
42
  "metadata": {},
43
  "output_type": "execute_result"
44
  }
 
51
  },
52
  {
53
  "cell_type": "code",
54
+ "execution_count": 35,
55
  "metadata": {},
56
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  "source": [
58
  "data = pd.read_csv('language_detection.csv')"
59
  ]
60
  },
61
  {
62
  "cell_type": "code",
63
+ "execution_count": 36,
64
  "metadata": {},
65
  "outputs": [],
66
  "source": [
 
70
  },
71
  {
72
  "cell_type": "code",
73
+ "execution_count": 37,
74
  "metadata": {},
75
  "outputs": [],
76
  "source": [
 
81
  },
82
  {
83
  "cell_type": "code",
84
+ "execution_count": 38,
85
  "metadata": {},
86
  "outputs": [
87
  {
 
92
  " 'Russian', 'Spanish', 'Sweedish', 'Tamil', 'Turkish'], dtype=object)"
93
  ]
94
  },
95
+ "execution_count": 38,
96
  "metadata": {},
97
  "output_type": "execute_result"
98
  }
 
103
  },
104
  {
105
  "cell_type": "code",
106
+ "execution_count": 39,
107
  "metadata": {},
108
  "outputs": [],
109
  "source": [
 
117
  },
118
  {
119
  "cell_type": "code",
120
+ "execution_count": 40,
121
  "metadata": {},
122
  "outputs": [],
123
  "source": [
 
128
  },
129
  {
130
  "cell_type": "code",
131
+ "execution_count": 42,
132
  "metadata": {},
133
  "outputs": [],
134
  "source": [
 
140
  },
141
  {
142
  "cell_type": "code",
143
+ "execution_count": 43,
144
  "metadata": {},
145
  "outputs": [
146
  {
147
  "data": {
148
  "text/html": [
149
+ "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MultinomialNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MultinomialNB</label><div class=\"sk-toggleable__content\"><pre>MultinomialNB()</pre></div></div></div></div></div>"
150
  ],
151
  "text/plain": [
152
  "MultinomialNB()"
153
  ]
154
  },
155
+ "execution_count": 43,
156
  "metadata": {},
157
  "output_type": "execute_result"
158
  }
 
166
  },
167
  {
168
  "cell_type": "code",
169
+ "execution_count": 44,
170
  "metadata": {},
171
  "outputs": [],
172
  "source": [
 
175
  },
176
  {
177
  "cell_type": "code",
178
+ "execution_count": 45,
179
  "metadata": {},
180
  "outputs": [
181
  {
 
262
  },
263
  {
264
  "cell_type": "code",
265
+ "execution_count": 48,
266
  "metadata": {},
267
  "outputs": [],
268
  "source": [
269
+ "# # Save the model\n",
270
+ "# import pickle\n",
271
  "\n",
272
+ "# with open('trained-01.pkl', 'wb') as file:\n",
273
+ "# pickle.dump(model, file)\n"
274
  ]
275
  },
276
  {
277
  "cell_type": "code",
278
+ "execution_count": 46,
279
  "metadata": {},
280
  "outputs": [
281
  {
 
284
  "array(['Portugeese', 'English'], dtype=object)"
285
  ]
286
  },
287
+ "execution_count": 46,
288
  "metadata": {},
289
  "output_type": "execute_result"
290
  }
 
301
  },
302
  {
303
  "cell_type": "code",
304
+ "execution_count": 50,
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "data": {
309
+ "text/plain": [
310
+ "'Portugeese'"
311
+ ]
312
+ },
313
+ "execution_count": 50,
314
+ "metadata": {},
315
+ "output_type": "execute_result"
316
+ }
317
+ ],
318
+ "source": [
319
+ "classes = ['Arabic', 'Danish', 'Dutch', 'English', 'French', 'German',\n",
320
+ " 'Greek', 'Hindi', 'Italian', 'Kannada', 'Malayalam', 'Portugeese',\n",
321
+ " 'Russian', 'Spanish', 'Sweedish', 'Tamil', 'Turkish']\n",
322
+ "\n",
323
+ "\n",
324
+ "\n",
325
+ "def predict_language(text):\n",
326
+ " text = re.sub(r'[!@#$(),\\n\"%^*?\\:;~`0-9]', ' ', text)\n",
327
+ " text = re.sub(r'[\\[\\]]', ' ', text)\n",
328
+ " text = text.lower()\n",
329
+ " text = cv.transform([text]).toarray()\n",
330
+ " pred = model.predict(text)\n",
331
+ " return classes[pred[0]]\n",
332
+ "\n",
333
+ "\n",
334
+ "predict_language('Oi tudo bem?')"
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": 30,
340
  "metadata": {},
341
  "outputs": [],
342
  "source": [
343
+ "import requests\n",
344
+ "# teste api local \n",
345
+ "url = 'http://localhost:8080/predict'\n",
346
+ "\n",
347
+ "text1 = 'Oi tudo bem, como voce vai?'\n",
348
+ "\n",
349
+ "response = requests.post(url, json={'text': text1})\n",
350
  "\n"
351
  ]
352
  },
353
  {
354
  "cell_type": "code",
355
+ "execution_count": 31,
356
  "metadata": {},
357
+ "outputs": [
358
+ {
359
+ "data": {
360
+ "text/plain": [
361
+ "<Response [500]>"
362
+ ]
363
+ },
364
+ "execution_count": 31,
365
+ "metadata": {},
366
+ "output_type": "execute_result"
367
+ }
368
+ ],
369
+ "source": [
370
+ "response"
371
+ ]
372
  },
373
  {
374
  "cell_type": "code",
app/main.py CHANGED
@@ -18,4 +18,5 @@ def predict(payload: TextIn):
18
  language = predict_language(payload.text)
19
  return {"language": language}
20
  except Exception as e:
21
- raise HTTPException(status_code=500, detail="Internal Server Error")
 
 
18
  language = predict_language(payload.text)
19
  return {"language": language}
20
  except Exception as e:
21
+ raise HTTPException(status_code=500, detail="Internal Server Error")
22
+
app/model/model.py CHANGED
@@ -1,7 +1,8 @@
1
  import pickle
2
  import re
3
  from pathlib import Path
4
-
 
5
  __version__ = '01'
6
 
7
 
@@ -21,5 +22,6 @@ def predict_language(text):
21
  text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
22
  text = re.sub(r'[\[\]]', ' ', text)
23
  text = text.lower()
 
24
  pred = model.predict([text])
25
  return classes[pred[0]]
 
1
  import pickle
2
  import re
3
  from pathlib import Path
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+ cv = CountVectorizer(max_features = 1500)
6
  __version__ = '01'
7
 
8
 
 
22
  text = re.sub(r'[!@#$(),\n"%^*?\:;~`0-9]', ' ', text)
23
  text = re.sub(r'[\[\]]', ' ', text)
24
  text = text.lower()
25
+ text = cv.transform([text]).toarray()
26
  pred = model.predict([text])
27
  return classes[pred[0]]