iliyan14 commited on
Commit
6f1e936
·
verified ·
1 Parent(s): 5cea9e7

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.47.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.47.1"
6
+ }
medicine predictor/medical_recomendation.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Medical Recomendation
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/#fileId=https%3A//storage.googleapis.com/kaggle-colab-exported-notebooks/medical-recomendation-bdc9e8f5-dc04-4b0d-899b-76151393f750.ipynb%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com/20250105/auto/storage/goog4_request%26X-Goog-Date%3D20250105T163440Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D5f5edfc6179d91a128626c6c369d548e8371ff02ec6ab300764df5e5763928d98eb94faa3f3754b73d8cbfbca8f734df4b630c9f8a6520c811a366bf273ce121a32bc10bd6e0e61fb1169f3394260d7a21669fde4e46d4733e405b9df204bbc22ed3e86c3868e249f5f36197a99430999180fc7d6cda561cfeba0c1a874175426fca7775518c5c46275376390d75492c2deaa164a4fa096cd33ce0bbdc7b95c952846e3a4bea9230588bf1481c24bef89559057f4974519bd4d1b6933423c7ffe2201c7fec3dccb0c3b3983d5d34b0f74c799f6cf1f5de6bbc61812e36acaee18ed0b43de1a92f62a1ce36d5225e8b96e6b05440463d9f9914322a7ab6d7e8a0
8
+ """
9
+
10
+ # IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
11
+ # THEN FEEL FREE TO DELETE THIS CELL.
12
+ # NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
13
+ # ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
14
+ # NOTEBOOK.
15
+ import kagglehub
16
+ noorsaeed_medicine_recommendation_system_dataset_path = kagglehub.dataset_download('noorsaeed/medicine-recommendation-system-dataset')
17
+
18
+ print('Data source import complete.')
19
+
20
+ # This Python 3 environment comes with many helpful analytics libraries installed
21
+ # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
22
+ # For example, here's several helpful packages to load
23
+
24
+ import numpy as np # linear algebra
25
+ import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
26
+
27
+ # Input data files are available in the read-only "../input/" directory
28
+ # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
29
+
30
+ import os
31
+ for dirname, _, filenames in os.walk('/kaggle/input'):
32
+ for filename in filenames:
33
+ print(os.path.join(dirname, filename))
34
+
35
+ # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
36
+ # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
37
+
38
+ import pandas as pd
39
+
40
+ dataset = pd.read_csv('/kaggle/input/Training.csv')
41
+
42
+ dataset
43
+
44
+ dataset.shape
45
+
46
+ from sklearn.model_selection import train_test_split
47
+ from sklearn.preprocessing import LabelEncoder
48
+
49
+ X = dataset.drop('prognosis', axis=1)
50
+ y = dataset['prognosis']
51
+
52
+ # ecoding prognonsis
53
+ le = LabelEncoder()
54
+ le.fit(y)
55
+ Y = le.transform(y)
56
+
57
+ X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)
58
+
59
+ from sklearn.datasets import make_classification
60
+ from sklearn.model_selection import train_test_split
61
+ from sklearn.svm import SVC
62
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
63
+ from sklearn.neighbors import KNeighborsClassifier
64
+ from sklearn.naive_bayes import MultinomialNB
65
+ from sklearn.metrics import accuracy_score, confusion_matrix
66
+ import numpy as np
67
+
68
+
69
+ # Create a dictionary to store models
70
+ models = {
71
+ 'SVC': SVC(kernel='linear'),
72
+ 'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
73
+ 'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
74
+ 'KNeighbors': KNeighborsClassifier(n_neighbors=5),
75
+ 'MultinomialNB': MultinomialNB()
76
+ }
77
+
78
+ for model_name, model in models.items():
79
+ # Train the model
80
+ model.fit(X_train, y_train)
81
+
82
+ # Test the model
83
+ predictions = model.predict(X_test)
84
+
85
+ # Calculate accuracy
86
+ accuracy = accuracy_score(y_test, predictions)
87
+ print(f"{model_name} Accuracy: {accuracy}")
88
+
89
+ # Calculate confusion matrix
90
+ cm = confusion_matrix(y_test, predictions)
91
+ print(f"{model_name} Confusion Matrix:")
92
+ print(np.array2string(cm, separator=', '))
93
+
94
+ print("\n" + "="*40 + "\n")
95
+
96
+ # selecting svc
97
+ svc = SVC(kernel='linear')
98
+ svc.fit(X_train,y_train)
99
+ ypred = svc.predict(X_test)
100
+ test_accuracy = accuracy_score(y_test,ypred)
101
+
102
+ train_pred = svc.predict(X_train)
103
+ train_accuracy = accuracy_score(y_train, train_pred)
104
+ train_accuracy
105
+ if train_accuracy - test_accuracy > 0.1: # 10% gap threshold
106
+ print("The model is likely overfitting.")
107
+ else:
108
+ print("The model generalizes well.")
109
+
110
+ # Plotting
111
+ import matplotlib.pyplot as plt
112
+
113
+ # Plotting as a line chart
114
+ labels = ['Training Accuracy', 'Test Accuracy']
115
+ accuracies = [train_accuracy, test_accuracy]
116
+
117
+ plt.figure(figsize=(8, 5))
118
+ plt.plot(labels, accuracies, marker='o', color='blue', label="Accuracy")
119
+
120
+ # Adding details to the plot
121
+ plt.title('Overfitting Check: Training vs Test Accuracy', fontsize=14)
122
+ plt.ylabel('Accuracy', fontsize=12)
123
+ plt.ylim(0, 1)
124
+ plt.grid(alpha=0.4)
125
+ plt.legend(fontsize=12)
126
+
127
+ # Display the accuracies as text on the plot
128
+ for i, acc in enumerate(accuracies):
129
+ plt.text(i, acc + 0.02, f"{acc:.2f}", ha='center', fontsize=12)
130
+
131
+ plt.show()
132
+
133
+ # save svc
134
+ import pickle
135
+ pickle.dump(svc,open('svc.pkl','wb'))
136
+
137
+ # load model
138
+ svc = pickle.load(open('svc.pkl','rb'))
139
+
140
+ # test 1:
141
+ print("predicted disease :",svc.predict(X_test.iloc[0].values.reshape(1,-1)))
142
+ print("Actual Disease :", y_test[0])
143
+
144
+ # test 2:
145
+ print("predicted disease :",svc.predict(X_test.iloc[100].values.reshape(1,-1)))
146
+ print("Actual Disease :", y_test[100])
147
+
148
+ sym_des = pd.read_csv('/kaggle/input/symtoms_df.csv')
149
+ precautions = pd.read_csv('/kaggle/input/precautions_df.csv')
150
+ workout = pd.read_csv('/kaggle/input/workout_df.csv')
151
+ description = pd.read_csv('/kaggle/input/description.csv')
152
+ medications = pd.read_csv('/kaggle/input/medications.csv')
153
+ diets = pd.read_csv('/kaggle/input/diets.csv')
154
+
155
+ def helper(dis):
156
+ desc = description[description['Disease'] == predicted_disease]['Description']
157
+ desc = " ".join([w for w in desc])
158
+
159
+ pre = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
160
+ pre = [col for col in pre.values]
161
+
162
+ med = medications[medications['Disease'] == dis]['Medication']
163
+ med = [med for med in med.values]
164
+
165
+ die = diets[diets['Disease'] == dis]['Diet']
166
+ die = [die for die in die.values]
167
+
168
+ wrkout = workout[workout['disease'] == dis] ['workout']
169
+
170
+
171
+ return desc,pre,med,die,wrkout
172
+
173
+ symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}
174
+ diseases_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}
175
+ def get_predicted_value(patient_symptoms):
176
+ input_vector = np.zeros(len(symptoms_dict))
177
+ for item in patient_symptoms:
178
+ input_vector[symptoms_dict[item]] = 1
179
+ return diseases_list[svc.predict([input_vector])[0]]
180
+
181
+ symptoms = input("Enter your symptoms.......")
182
+ user_symptoms = [s.strip() for s in symptoms.split(',')]
183
+ user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
184
+ predicted_disease = get_predicted_value(user_symptoms)
185
+
186
+ desc, pre, med, die, wrkout = helper(predicted_disease)
187
+
188
+ print("=================predicted disease============")
189
+ print(predicted_disease)
190
+ print("=================description==================")
191
+ print(desc)
192
+ print("=================precautions==================")
193
+ i = 1
194
+ for p_i in pre[0]:
195
+ print(i, ": ", p_i)
196
+ i += 1
197
+
198
+ print("=================medications==================")
199
+ for m_i in med:
200
+ print(i, ": ", m_i)
201
+ i += 1
202
+
203
+ print("=================workout==================")
204
+ for w_i in wrkout:
205
+ print(i, ": ", w_i)
206
+ i += 1
207
+
208
+ print("=================diets==================")
209
+ for d_i in die:
210
+ print(i, ": ", d_i)
211
+ i += 1
212
+
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d00560d8910fbed77ffad4065dee5011c41ba401b1064e749c498ba9e20373
3
+ size 497774208
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2937d5ad2e6a4434c87e5560f7e335c52695ce9d8ca7f7c49e96bc58d565728f
3
+ size 5304