kalyan889 commited on
Commit
1e7faf2
·
1 Parent(s): 192cc02

Upload TextClassification_Patient_Symptoms_and_Diseases.ipynb

Browse files
TextClassification_Patient_Symptoms_and_Diseases.ipynb ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "markdown",
19
+ "source": [
20
+ "AIMERS"
21
+ ],
22
+ "metadata": {
23
+ "id": "D1-ngpe5C5_X"
24
+ }
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "metadata": {
30
+ "colab": {
31
+ "base_uri": "https://localhost:8080/"
32
+ },
33
+ "id": "FU57l9-06L5O",
34
+ "outputId": "4953ae4a-a382-458a-870d-5d4b1ca1f711"
35
+ },
36
+ "outputs": [
37
+ {
38
+ "output_type": "stream",
39
+ "name": "stdout",
40
+ "text": [
41
+ "Accuracy: 0.0\n",
42
+ "Classification Report:\n",
43
+ " precision recall f1-score support\n",
44
+ "\n",
45
+ " Acanthosis nigricans 0.00 0.00 0.00 0.0\n",
46
+ " Acariasis 0.00 0.00 0.00 0.0\n",
47
+ " Acne 0.00 0.00 0.00 0.0\n",
48
+ " Acute bronchitis 0.00 0.00 0.00 1.0\n",
49
+ " Acute bronchospasm 0.00 0.00 0.00 1.0\n",
50
+ " Acute glaucoma 0.00 0.00 0.00 1.0\n",
51
+ " Acute pancreatitis 0.00 0.00 0.00 0.0\n",
52
+ " Acute stress reaction 0.00 0.00 0.00 1.0\n",
53
+ " Adjustment reaction 0.00 0.00 0.00 1.0\n",
54
+ " Alcohol intoxication 0.00 0.00 0.00 0.0\n",
55
+ " Alcohol withdrawal 0.00 0.00 0.00 1.0\n",
56
+ " Alcoholic liver disease 0.00 0.00 0.00 0.0\n",
57
+ " Allergy 0.00 0.00 0.00 0.0\n",
58
+ " Allergy to animals 0.00 0.00 0.00 1.0\n",
59
+ " Anemia due to chronic kidney disease 0.00 0.00 0.00 1.0\n",
60
+ " Anemia of chronic disease 0.00 0.00 0.00 1.0\n",
61
+ " Angina 0.00 0.00 0.00 0.0\n",
62
+ " Ankylosing spondylitis 0.00 0.00 0.00 0.0\n",
63
+ " Aphakia 0.00 0.00 0.00 0.0\n",
64
+ " Aphthous ulcer 0.00 0.00 0.00 1.0\n",
65
+ " Arthritis of the hip 0.00 0.00 0.00 1.0\n",
66
+ " Asthma 0.00 0.00 0.00 0.0\n",
67
+ " Atelectasis 0.00 0.00 0.00 0.0\n",
68
+ " Athlete's foot 0.00 0.00 0.00 1.0\n",
69
+ " Atonic bladder 0.00 0.00 0.00 0.0\n",
70
+ " Atrial fibrillation 0.00 0.00 0.00 0.0\n",
71
+ " Benign vaginal discharge (leukorrhea) 0.00 0.00 0.00 0.0\n",
72
+ " Bipolar disorder 0.00 0.00 0.00 1.0\n",
73
+ " Birth trauma 0.00 0.00 0.00 0.0\n",
74
+ " Bladder cancer 0.00 0.00 0.00 0.0\n",
75
+ " Breast cancer 0.00 0.00 0.00 1.0\n",
76
+ " Breast cyst 0.00 0.00 0.00 0.0\n",
77
+ " Bursitis 0.00 0.00 0.00 1.0\n",
78
+ " Carbon monoxide poisoning 0.00 0.00 0.00 0.0\n",
79
+ " Cellulitis or abscess of mouth 0.00 0.00 0.00 1.0\n",
80
+ " Cervicitis 0.00 0.00 0.00 0.0\n",
81
+ " Chalazion 0.00 0.00 0.00 0.0\n",
82
+ " Cholecystitis 0.00 0.00 0.00 0.0\n",
83
+ " Choledocholithiasis 0.00 0.00 0.00 0.0\n",
84
+ " Cholesteatoma 0.00 0.00 0.00 0.0\n",
85
+ " Chondromalacia of the patella 0.00 0.00 0.00 0.0\n",
86
+ " Chronic back pain 0.00 0.00 0.00 0.0\n",
87
+ " Chronic glaucoma 0.00 0.00 0.00 1.0\n",
88
+ " Chronic kidney disease 0.00 0.00 0.00 0.0\n",
89
+ " Chronic obstructive pulmonary disease (COPD) 0.00 0.00 0.00 0.0\n",
90
+ " Chronic otitis media 0.00 0.00 0.00 1.0\n",
91
+ " Chronic pain disorder 0.00 0.00 0.00 1.0\n",
92
+ " Chronic pancreatitis 0.00 0.00 0.00 1.0\n",
93
+ " Chronic rheumatic fever 0.00 0.00 0.00 0.0\n",
94
+ " Chronic ulcer 0.00 0.00 0.00 0.0\n",
95
+ " Cirrhosis 0.00 0.00 0.00 1.0\n",
96
+ " Cold sore 0.00 0.00 0.00 0.0\n",
97
+ " Colorectal cancer 0.00 0.00 0.00 0.0\n",
98
+ " Congenital rubella 0.00 0.00 0.00 1.0\n",
99
+ " Conjunctivitis due to allergy 0.00 0.00 0.00 1.0\n",
100
+ " Coronary atherosclerosis 0.00 0.00 0.00 1.0\n",
101
+ " Croup 0.00 0.00 0.00 0.0\n",
102
+ " Crushing injury 0.00 0.00 0.00 1.0\n",
103
+ " Cyst of the eyelid 0.00 0.00 0.00 1.0\n",
104
+ " Cystic Fibrosis 0.00 0.00 0.00 1.0\n",
105
+ " Cytomegalovirus infection 0.00 0.00 0.00 1.0\n",
106
+ " De Quervain disease 0.00 0.00 0.00 1.0\n",
107
+ " Degenerative disc disease 0.00 0.00 0.00 1.0\n",
108
+ " Dengue fever 0.00 0.00 0.00 0.0\n",
109
+ " Depression 0.00 0.00 0.00 0.0\n",
110
+ " Diabetes insipidus 0.00 0.00 0.00 1.0\n",
111
+ " Diaper rash 0.00 0.00 0.00 0.0\n",
112
+ " Dislocation of the ankle 0.00 0.00 0.00 0.0\n",
113
+ " Dislocation of the finger 0.00 0.00 0.00 1.0\n",
114
+ " Dislocation of the foot 0.00 0.00 0.00 1.0\n",
115
+ " Dislocation of the hip 0.00 0.00 0.00 1.0\n",
116
+ " Dislocation of the shoulder 0.00 0.00 0.00 0.0\n",
117
+ " Dissociative disorder 0.00 0.00 0.00 1.0\n",
118
+ " Down syndrome 0.00 0.00 0.00 1.0\n",
119
+ " Drug abuse (cocaine) 0.00 0.00 0.00 0.0\n",
120
+ " Drug reaction 0.00 0.00 0.00 1.0\n",
121
+ " Dry eye of unknown cause 0.00 0.00 0.00 0.0\n",
122
+ " Dyshidrosis 0.00 0.00 0.00 1.0\n",
123
+ " Ear drum damage 0.00 0.00 0.00 0.0\n",
124
+ " Ear wax impaction 0.00 0.00 0.00 1.0\n",
125
+ " Emphysema 0.00 0.00 0.00 0.0\n",
126
+ " Empyema 0.00 0.00 0.00 1.0\n",
127
+ " Encephalitis 0.00 0.00 0.00 0.0\n",
128
+ " Endocarditis 0.00 0.00 0.00 1.0\n",
129
+ " Endometrial hyperplasia 0.00 0.00 0.00 1.0\n",
130
+ " Esophageal cancer 0.00 0.00 0.00 0.0\n",
131
+ " Essential tremor 0.00 0.00 0.00 1.0\n",
132
+ " Factitious disorder 0.00 0.00 0.00 1.0\n",
133
+ " Fat embolism 0.00 0.00 0.00 1.0\n",
134
+ " Female genitalia infection 0.00 0.00 0.00 1.0\n",
135
+ " Fibroadenoma 0.00 0.00 0.00 1.0\n",
136
+ " Fibromyalgia 0.00 0.00 0.00 0.0\n",
137
+ " Floaters 0.00 0.00 0.00 0.0\n",
138
+ " Fluid overload 0.00 0.00 0.00 1.0\n",
139
+ " Foreign body in the eye 0.00 0.00 0.00 0.0\n",
140
+ " Foreign body in the throat 0.00 0.00 0.00 0.0\n",
141
+ " Foreign body in the vagina 0.00 0.00 0.00 0.0\n",
142
+ " Fracture of the ankle 0.00 0.00 0.00 1.0\n",
143
+ " Fracture of the arm 0.00 0.00 0.00 1.0\n",
144
+ " Fracture of the finger 0.00 0.00 0.00 0.0\n",
145
+ " Fracture of the hand 0.00 0.00 0.00 0.0\n",
146
+ " Fracture of the jaw 0.00 0.00 0.00 1.0\n",
147
+ " Fracture of the leg 0.00 0.00 0.00 0.0\n",
148
+ " Fracture of the patella 0.00 0.00 0.00 1.0\n",
149
+ " G6PD enzyme deficiency 0.00 0.00 0.00 0.0\n",
150
+ " Galactorrhea of unknown cause 0.00 0.00 0.00 0.0\n",
151
+ " Gallstone 0.00 0.00 0.00 0.0\n",
152
+ " Gastritis 0.00 0.00 0.00 0.0\n",
153
+ " Gastroduodenal ulcer 0.00 0.00 0.00 1.0\n",
154
+ " Gout 0.00 0.00 0.00 0.0\n",
155
+ " Granuloma inguinale 0.00 0.00 0.00 0.0\n",
156
+ " Gynecomastia 0.00 0.00 0.00 0.0\n",
157
+ " Hashimoto thyroiditis 0.00 0.00 0.00 1.0\n",
158
+ " Head and neck cancer 0.00 0.00 0.00 1.0\n",
159
+ " Heart attack 0.00 0.00 0.00 1.0\n",
160
+ " Heart contusion 0.00 0.00 0.00 0.0\n",
161
+ " Heart failure 0.00 0.00 0.00 1.0\n",
162
+ " Hemarthrosis 0.00 0.00 0.00 1.0\n",
163
+ " Hematoma 0.00 0.00 0.00 1.0\n",
164
+ " Hemolytic anemia 0.00 0.00 0.00 1.0\n",
165
+ " High blood pressure 0.00 0.00 0.00 0.0\n",
166
+ " Hirsutism 0.00 0.00 0.00 1.0\n",
167
+ " Human immunodeficiency virus infection (HIV) 0.00 0.00 0.00 1.0\n",
168
+ " Hydatidiform mole 0.00 0.00 0.00 1.0\n",
169
+ " Hydrocele of the testicle 0.00 0.00 0.00 0.0\n",
170
+ " Hydronephrosis 0.00 0.00 0.00 1.0\n",
171
+ " Hyperemesis gravidarum 0.00 0.00 0.00 0.0\n",
172
+ " Hypergammaglobulinemia 0.00 0.00 0.00 1.0\n",
173
+ " Hyperkalemia 0.00 0.00 0.00 0.0\n",
174
+ " Hypernatremia 0.00 0.00 0.00 1.0\n",
175
+ "Hypertrophic obstructive cardiomyopathy (HOCM) 0.00 0.00 0.00 1.0\n",
176
+ " Hyponatremia 0.00 0.00 0.00 0.0\n",
177
+ " Impetigo 0.00 0.00 0.00 1.0\n",
178
+ " Indigestion 0.00 0.00 0.00 1.0\n",
179
+ " Infectious gastroenteritis 0.00 0.00 0.00 1.0\n",
180
+ " Ingrown toe nail 0.00 0.00 0.00 1.0\n",
181
+ " Inguinal hernia 0.00 0.00 0.00 0.0\n",
182
+ " Injury of the ankle 0.00 0.00 0.00 0.0\n",
183
+ " Injury to the abdomen 0.00 0.00 0.00 1.0\n",
184
+ " Injury to the finger 0.00 0.00 0.00 1.0\n",
185
+ " Injury to the hip 0.00 0.00 0.00 1.0\n",
186
+ " Injury to the knee 0.00 0.00 0.00 0.0\n",
187
+ " Insect bite 0.00 0.00 0.00 0.0\n",
188
+ " Intestinal cancer 0.00 0.00 0.00 1.0\n",
189
+ " Intestinal malabsorption 0.00 0.00 0.00 1.0\n",
190
+ " Intestinal obstruction 0.00 0.00 0.00 0.0\n",
191
+ " Intracranial abscess 0.00 0.00 0.00 1.0\n",
192
+ " Irritable bowel syndrome 0.00 0.00 0.00 0.0\n",
193
+ " Kaposi sarcoma 0.00 0.00 0.00 1.0\n",
194
+ " Kidney cancer 0.00 0.00 0.00 1.0\n",
195
+ " Kidney stone 0.00 0.00 0.00 1.0\n",
196
+ " Knee ligament or meniscus tear 0.00 0.00 0.00 1.0\n",
197
+ " Lactose intolerance 0.00 0.00 0.00 1.0\n",
198
+ " Leishmaniasis 0.00 0.00 0.00 1.0\n",
199
+ " Lichen planus 0.00 0.00 0.00 1.0\n",
200
+ " Lipoma 0.00 0.00 0.00 1.0\n",
201
+ " Lung cancer 0.00 0.00 0.00 1.0\n",
202
+ " Lymphadenitis 0.00 0.00 0.00 0.0\n",
203
+ " Lymphangitis 0.00 0.00 0.00 1.0\n",
204
+ " Lymphogranuloma venereum 0.00 0.00 0.00 1.0\n",
205
+ " Magnesium deficiency 0.00 0.00 0.00 1.0\n",
206
+ " Malignant hypertension 0.00 0.00 0.00 1.0\n",
207
+ " Marijuana abuse 0.00 0.00 0.00 0.0\n",
208
+ " Mastoiditis 0.00 0.00 0.00 1.0\n",
209
+ " Meckel diverticulum 0.00 0.00 0.00 0.0\n",
210
+ " Migraine 0.00 0.00 0.00 1.0\n",
211
+ " Mitral valve disease 0.00 0.00 0.00 1.0\n",
212
+ " Molluscum contagiosum 0.00 0.00 0.00 1.0\n",
213
+ " Mononucleosis 0.00 0.00 0.00 0.0\n",
214
+ " Moyamoya disease 0.00 0.00 0.00 0.0\n",
215
+ " Mucositis 0.00 0.00 0.00 0.0\n",
216
+ " Mumps 0.00 0.00 0.00 1.0\n",
217
+ " Muscle spasm 0.00 0.00 0.00 1.0\n",
218
+ " Narcolepsy 0.00 0.00 0.00 0.0\n",
219
+ " Neonatal jaundice 0.00 0.00 0.00 1.0\n",
220
+ " Neurosis 0.00 0.00 0.00 0.0\n",
221
+ " Noninfectious gastroenteritis 0.00 0.00 0.00 0.0\n",
222
+ " Obstructive sleep apnea (OSA) 0.00 0.00 0.00 1.0\n",
223
+ " Onychomycosis 0.00 0.00 0.00 0.0\n",
224
+ " Open wound of the cheek 0.00 0.00 0.00 1.0\n",
225
+ " Open wound of the finger 0.00 0.00 0.00 0.0\n",
226
+ " Open wound of the hand 0.00 0.00 0.00 1.0\n",
227
+ " Open wound of the head 0.00 0.00 0.00 1.0\n",
228
+ " Open wound of the hip 0.00 0.00 0.00 0.0\n",
229
+ " Open wound of the mouth 0.00 0.00 0.00 1.0\n",
230
+ " Open wound of the neck 0.00 0.00 0.00 1.0\n",
231
+ " Open wound of the shoulder 0.00 0.00 0.00 0.0\n",
232
+ " Oral leukoplakia 0.00 0.00 0.00 0.0\n",
233
+ " Oral mucosal lesion 0.00 0.00 0.00 0.0\n",
234
+ " Oral thrush (yeast infection) 0.00 0.00 0.00 1.0\n",
235
+ " Osteoarthritis 0.00 0.00 0.00 0.0\n",
236
+ " Otitis externa (swimmer's ear) 0.00 0.00 0.00 0.0\n",
237
+ " Pancreatic cancer 0.00 0.00 0.00 1.0\n",
238
+ " Panic disorder 0.00 0.00 0.00 0.0\n",
239
+ " Parkinson disease 0.00 0.00 0.00 0.0\n",
240
+ " Paronychia 0.00 0.00 0.00 0.0\n",
241
+ " Patau syndrome 0.00 0.00 0.00 0.0\n",
242
+ " Pelvic fistula 0.00 0.00 0.00 1.0\n",
243
+ " Pelvic organ prolapse 0.00 0.00 0.00 0.0\n",
244
+ " Pemphigus 0.00 0.00 0.00 0.0\n",
245
+ " Pericarditis 0.00 0.00 0.00 1.0\n",
246
+ " Perirectal infection 0.00 0.00 0.00 1.0\n",
247
+ " Peritonsillar abscess 0.00 0.00 0.00 1.0\n",
248
+ " Personality disorder 0.00 0.00 0.00 0.0\n",
249
+ " Phimosis 0.00 0.00 0.00 1.0\n",
250
+ " Pilonidal cyst 0.00 0.00 0.00 1.0\n",
251
+ " Placental abruption 0.00 0.00 0.00 1.0\n",
252
+ " Pleural effusion 0.00 0.00 0.00 1.0\n",
253
+ " Pneumonia 0.00 0.00 0.00 0.0\n",
254
+ " Pneumothorax 0.00 0.00 0.00 1.0\n",
255
+ " Poisoning due to analgesics 0.00 0.00 0.00 1.0\n",
256
+ " Poisoning due to antidepressants 0.00 0.00 0.00 0.0\n",
257
+ " Polycystic ovarian syndrome (PCOS) 0.00 0.00 0.00 0.0\n",
258
+ " Premature ovarian failure 0.00 0.00 0.00 1.0\n",
259
+ " Premenstrual tension syndrome 0.00 0.00 0.00 0.0\n",
260
+ " Problem during pregnancy 0.00 0.00 0.00 0.0\n",
261
+ " Protein deficiency 0.00 0.00 0.00 0.0\n",
262
+ " Pseudohypoparathyroidism 0.00 0.00 0.00 1.0\n",
263
+ " Psoriasis 0.00 0.00 0.00 0.0\n",
264
+ " Psychotic disorder 0.00 0.00 0.00 1.0\n",
265
+ " Pulmonary embolism 0.00 0.00 0.00 0.0\n",
266
+ " Pulmonary eosinophilia 0.00 0.00 0.00 1.0\n",
267
+ " Pulmonary fibrosis 0.00 0.00 0.00 0.0\n",
268
+ " Pyelonephritis 0.00 0.00 0.00 0.0\n",
269
+ " Pyloric stenosis 0.00 0.00 0.00 1.0\n",
270
+ " Rabies 0.00 0.00 0.00 0.0\n",
271
+ " Reactive arthritis 0.00 0.00 0.00 1.0\n",
272
+ " Sarcoidosis 0.00 0.00 0.00 1.0\n",
273
+ " Scarlet fever 0.00 0.00 0.00 1.0\n",
274
+ " Sciatica 0.00 0.00 0.00 0.0\n",
275
+ " Scoliosis 0.00 0.00 0.00 1.0\n",
276
+ " Scurvy 0.00 0.00 0.00 1.0\n",
277
+ " Sebaceous cyst 0.00 0.00 0.00 0.0\n",
278
+ " Sepsis 0.00 0.00 0.00 1.0\n",
279
+ " Septic arthritis 0.00 0.00 0.00 1.0\n",
280
+ " Shingles (herpes zoster) 0.00 0.00 0.00 0.0\n",
281
+ " Sickle cell crisis 0.00 0.00 0.00 1.0\n",
282
+ " Sjogren syndrome 0.00 0.00 0.00 1.0\n",
283
+ " Skin pigmentation disorder 0.00 0.00 0.00 1.0\n",
284
+ " Smoking or tobacco addiction 0.00 0.00 0.00 1.0\n",
285
+ " Spermatocele 0.00 0.00 0.00 1.0\n",
286
+ " Spondylitis 0.00 0.00 0.00 0.0\n",
287
+ " Spondylolisthesis 0.00 0.00 0.00 1.0\n",
288
+ " Spondylosis 0.00 0.00 0.00 0.0\n",
289
+ " Sporotrichosis 0.00 0.00 0.00 1.0\n",
290
+ " Sprain or strain 0.00 0.00 0.00 0.0\n",
291
+ " Stenosis of the tear duct 0.00 0.00 0.00 1.0\n",
292
+ " Strep throat 0.00 0.00 0.00 1.0\n",
293
+ " Stress incontinence 0.00 0.00 0.00 1.0\n",
294
+ " Stroke 0.00 0.00 0.00 1.0\n",
295
+ " Subarachnoid hemorrhage 0.00 0.00 0.00 1.0\n",
296
+ " Subconjunctival hemorrhage 0.00 0.00 0.00 1.0\n",
297
+ " Tendinitis 0.00 0.00 0.00 1.0\n",
298
+ " Testicular torsion 0.00 0.00 0.00 1.0\n",
299
+ " Thoracic aortic aneurysm 0.00 0.00 0.00 1.0\n",
300
+ " Tietze syndrome 0.00 0.00 0.00 0.0\n",
301
+ " Tonsillar hypertrophy 0.00 0.00 0.00 1.0\n",
302
+ " Tonsillitis 0.00 0.00 0.00 0.0\n",
303
+ " Tooth abscess 0.00 0.00 0.00 0.0\n",
304
+ " Tooth disorder 0.00 0.00 0.00 0.0\n",
305
+ " Torticollis 0.00 0.00 0.00 1.0\n",
306
+ " Tourette syndrome 0.00 0.00 0.00 1.0\n",
307
+ " Toxoplasmosis 0.00 0.00 0.00 1.0\n",
308
+ " Tracheitis 0.00 0.00 0.00 1.0\n",
309
+ " Transient ischemic attack 0.00 0.00 0.00 0.0\n",
310
+ " Trichinosis 0.00 0.00 0.00 1.0\n",
311
+ " Trichomonas infection 0.00 0.00 0.00 1.0\n",
312
+ " Tricuspid valve disease 0.00 0.00 0.00 1.0\n",
313
+ " Turner syndrome 0.00 0.00 0.00 1.0\n",
314
+ " Urethral stricture 0.00 0.00 0.00 0.0\n",
315
+ " Urge incontinence 0.00 0.00 0.00 1.0\n",
316
+ " Urinary tract obstruction 0.00 0.00 0.00 0.0\n",
317
+ " Vaginal yeast infection 0.00 0.00 0.00 0.0\n",
318
+ " Vaginitis 0.00 0.00 0.00 0.0\n",
319
+ " Varicocele of the testicles 0.00 0.00 0.00 1.0\n",
320
+ " Viral exanthem 0.00 0.00 0.00 1.0\n",
321
+ " Viral warts 0.00 0.00 0.00 0.0\n",
322
+ " Vitamin A deficiency 0.00 0.00 0.00 1.0\n",
323
+ " Vitreous degeneration 0.00 0.00 0.00 0.0\n",
324
+ " Vulvar cancer 0.00 0.00 0.00 1.0\n",
325
+ " Vulvar disorder 0.00 0.00 0.00 1.0\n",
326
+ " Vulvodynia 0.00 0.00 0.00 1.0\n",
327
+ " West Nile virus 0.00 0.00 0.00 1.0\n",
328
+ " Whooping cough 0.00 0.00 0.00 0.0\n",
329
+ " Wilson disease 0.00 0.00 0.00 0.0\n",
330
+ "\n",
331
+ " accuracy 0.00 160.0\n",
332
+ " macro avg 0.00 0.00 0.00 160.0\n",
333
+ " weighted avg 0.00 0.00 0.00 160.0\n",
334
+ "\n"
335
+ ]
336
+ },
337
+ {
338
+ "output_type": "stream",
339
+ "name": "stderr",
340
+ "text": [
341
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
342
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
343
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
344
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
345
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
346
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
347
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
348
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
349
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
350
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
351
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
352
+ " _warn_prf(average, modifier, msg_start, len(result))\n"
353
+ ]
354
+ }
355
+ ],
356
+ "source": [
357
+ "import pandas as pd\n",
358
+ "import re\n",
359
+ "import spacy\n",
360
+ "from sklearn.model_selection import train_test_split\n",
361
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
362
+ "from sklearn.pipeline import Pipeline\n",
363
+ "from sklearn.metrics import accuracy_score, classification_report\n",
364
+ "from sklearn.linear_model import LogisticRegression\n",
365
+ "\n",
366
+ "# Load the data\n",
367
+ "data = pd.read_csv('symptomssingle.csv')\n",
368
+ "\n",
369
+ "# Check for any missing values and remove them\n",
370
+ "data = data.dropna()\n",
371
+ "\n",
372
+ "# Define a function to separate symptoms and diseases from the text\n",
373
+ "def separate_symptoms_and_diseases(text):\n",
374
+ " symptoms = re.findall(r'{\"symptoms\":\"(.*?)\"}', text)\n",
375
+ " disease = re.sub(r'(?:{\"symptoms\":\".*?\"},?)+', '', text).strip()\n",
376
+ " disease = disease.replace('],', '').strip() # Remove '],' from the disease name\n",
377
+ " return symptoms, disease\n",
378
+ "\n",
379
+ "# Apply the function to the data\n",
380
+ "data['symptoms_and_diseases'] = data['data'].apply(separate_symptoms_and_diseases)\n",
381
+ "data[['symptoms', 'disease']] = pd.DataFrame(data['symptoms_and_diseases'].tolist(), index=data.index)\n",
382
+ "data = data.drop(columns=['data', 'symptoms_and_diseases'])\n",
383
+ "\n",
384
+ "# Load the spaCy model\n",
385
+ "nlp = spacy.load('en_core_web_sm')\n",
386
+ "\n",
387
+ "# Preprocessing function\n",
388
+ "def preprocess(symptoms):\n",
389
+ " processed_symptoms = []\n",
390
+ " for symptom in symptoms:\n",
391
+ " doc = nlp(symptom)\n",
392
+ " processed_symptom = ' '.join(token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha)\n",
393
+ " processed_symptoms.append(processed_symptom)\n",
394
+ " return ' '.join(processed_symptoms)\n",
395
+ "\n",
396
+ "# Preprocess the symptoms column\n",
397
+ "data['symptoms_preprocessed'] = data['symptoms'].apply(preprocess)\n",
398
+ "\n",
399
+ "\n",
400
+ "# Split the data into train and test sets\n",
401
+ "X_train, X_test, y_train, y_test = train_test_split(data['symptoms_preprocessed'], data['disease'], test_size=0.2, random_state=42)\n",
402
+ "\n",
403
+ "# Create a pipeline for text classification\n",
404
+ "pipeline = Pipeline([\n",
405
+ " ('tfidf', TfidfVectorizer(ngram_range=(1, 2))),\n",
406
+ " ('classifier', LogisticRegression(solver='liblinear', C=10))\n",
407
+ "])\n",
408
+ "\n",
409
+ "# Train the model\n",
410
+ "pipeline.fit(X_train, y_train)\n",
411
+ "\n",
412
+ "# Make predictions\n",
413
+ "y_pred = pipeline.predict(X_test)\n",
414
+ "\n",
415
+ "# Evaluate the model\n",
416
+ "print(\"Accuracy: \", accuracy_score(y_test, y_pred))\n",
417
+ "print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n"
418
+ ]
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "source": [
423
+ "!pip install joblib\n",
424
+ "import joblib\n",
425
+ "\n",
426
+ "# Save the trained model\n",
427
+ "joblib.dump(pipeline, 'DiseasePredictionBasedonSymptoms.joblib')\n"
428
+ ],
429
+ "metadata": {
430
+ "colab": {
431
+ "base_uri": "https://localhost:8080/"
432
+ },
433
+ "id": "emwnJJVwAupA",
434
+ "outputId": "5ecf5d36-cb69-43df-9a16-ccee7b17cfdf"
435
+ },
436
+ "execution_count": null,
437
+ "outputs": [
438
+ {
439
+ "output_type": "stream",
440
+ "name": "stdout",
441
+ "text": [
442
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
443
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (1.1.1)\n"
444
+ ]
445
+ },
446
+ {
447
+ "output_type": "execute_result",
448
+ "data": {
449
+ "text/plain": [
450
+ "['DiseasePredictionBasedonSymptoms.joblib']"
451
+ ]
452
+ },
453
+ "metadata": {},
454
+ "execution_count": 5
455
+ }
456
+ ]
457
+ },
458
+ {
459
+ "cell_type": "code",
460
+ "source": [
461
+ "import joblib\n",
462
+ "\n",
463
+ "# Load the saved model\n",
464
+ "loaded_pipeline = joblib.load('DiseasePredictionBasedonSymptoms.joblib')\n",
465
+ "\n",
466
+ "# Make predictions using the loaded model (example)\n",
467
+ "sample_symptom = \"Skin Rash\"\n",
468
+ "processed_symptom = preprocess([sample_symptom])\n",
469
+ "prediction = loaded_pipeline.predict([processed_symptom])\n",
470
+ "\n",
471
+ "print(\"Predicted disease:\", prediction[0])\n"
472
+ ],
473
+ "metadata": {
474
+ "colab": {
475
+ "base_uri": "https://localhost:8080/"
476
+ },
477
+ "id": "Tu4fmj1bBYNw",
478
+ "outputId": "a1a33056-3a0d-49ad-8cb8-b356fba6dd73"
479
+ },
480
+ "execution_count": null,
481
+ "outputs": [
482
+ {
483
+ "output_type": "stream",
484
+ "name": "stdout",
485
+ "text": [
486
+ "Predicted disease: Contact dermatitis\n"
487
+ ]
488
+ }
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "code",
493
+ "source": [],
494
+ "metadata": {
495
+ "id": "CY5qrRCkBGuJ"
496
+ },
497
+ "execution_count": null,
498
+ "outputs": []
499
+ }
500
+ ]
501
+ }