Spaces:
Build error
Build error
File size: 2,443 Bytes
f3b5806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import xgboost as xgb
import pandas as pd
class DiseaseModel:
def __init__(self):
self.all_symptoms = None
self.symptoms = None
self.pred_disease = None
self.model = xgb.XGBClassifier()
self.diseases = self.disease_list('data/dataset.csv')
def load_xgboost(self, model_path):
self.model.load_model(model_path)
def save_xgboost(self, model_path):
self.model.save_model(model_path)
def predict(self, X):
self.symptoms = X
disease_pred_idx = self.model.predict(self.symptoms)
self.pred_disease = self.diseases[disease_pred_idx].values[0]
disease_probability_array = self.model.predict_proba(self.symptoms)
disease_probability = disease_probability_array[0, disease_pred_idx[0]]
return self.pred_disease, disease_probability
def describe_disease(self, disease_name):
if disease_name not in self.diseases:
return "That disease is not contemplated in this model"
# Read disease dataframe
desc_df = pd.read_csv('data/symptom_Description.csv')
desc_df = desc_df.apply(lambda col: col.str.strip())
return desc_df[desc_df['Disease'] == disease_name]['Description'].values[0]
def describe_predicted_disease(self):
if self.pred_disease is None:
return "No predicted disease yet"
return self.describe_disease(self.pred_disease)
def disease_precautions(self, disease_name):
if disease_name not in self.diseases:
return "That disease is not contemplated in this model"
# Read precautions dataframe
prec_df = pd.read_csv('data/symptom_precaution.csv')
prec_df = prec_df.apply(lambda col: col.str.strip())
return prec_df[prec_df['Disease'] == disease_name].filter(regex='Precaution').values.tolist()[0]
def predicted_disease_precautions(self):
if self.pred_disease is None:
return "No predicted disease yet"
return self.disease_precautions(self.pred_disease)
def disease_list(self, kaggle_dataset):
df = pd.read_csv('data/clean_dataset.tsv', sep='\t')
# Preprocessing
y_data = df.iloc[:,-1]
X_data = df.iloc[:,:-1]
self.all_symptoms = X_data.columns
# Convert y to categorical values
y_data = y_data.astype('category')
return y_data.cat.categories |