Spaces:
Build error
Build error
| import xgboost as xgb | |
| import pandas as pd | |
| class DiseaseModel: | |
| def __init__(self): | |
| self.all_symptoms = None | |
| self.symptoms = None | |
| self.pred_disease = None | |
| self.model = xgb.XGBClassifier() | |
| self.diseases = self.disease_list('data/dataset.csv') | |
| def load_xgboost(self, model_path): | |
| self.model.load_model(model_path) | |
| def save_xgboost(self, model_path): | |
| self.model.save_model(model_path) | |
| def predict(self, X): | |
| self.symptoms = X | |
| disease_pred_idx = self.model.predict(self.symptoms) | |
| self.pred_disease = self.diseases[disease_pred_idx].values[0] | |
| disease_probability_array = self.model.predict_proba(self.symptoms) | |
| disease_probability = disease_probability_array[0, disease_pred_idx[0]] | |
| return self.pred_disease, disease_probability | |
| def describe_disease(self, disease_name): | |
| if disease_name not in self.diseases: | |
| return "That disease is not contemplated in this model" | |
| # Read disease dataframe | |
| desc_df = pd.read_csv('data/symptom_Description.csv') | |
| desc_df = desc_df.apply(lambda col: col.str.strip()) | |
| return desc_df[desc_df['Disease'] == disease_name]['Description'].values[0] | |
| def describe_predicted_disease(self): | |
| if self.pred_disease is None: | |
| return "No predicted disease yet" | |
| return self.describe_disease(self.pred_disease) | |
| def disease_precautions(self, disease_name): | |
| if disease_name not in self.diseases: | |
| return "That disease is not contemplated in this model" | |
| # Read precautions dataframe | |
| prec_df = pd.read_csv('data/symptom_precaution.csv') | |
| prec_df = prec_df.apply(lambda col: col.str.strip()) | |
| return prec_df[prec_df['Disease'] == disease_name].filter(regex='Precaution').values.tolist()[0] | |
| def predicted_disease_precautions(self): | |
| if self.pred_disease is None: | |
| return "No predicted disease yet" | |
| return self.disease_precautions(self.pred_disease) | |
| def disease_list(self, kaggle_dataset): | |
| df = pd.read_csv('data/clean_dataset.tsv', sep='\t') | |
| # Preprocessing | |
| y_data = df.iloc[:,-1] | |
| X_data = df.iloc[:,:-1] | |
| self.all_symptoms = X_data.columns | |
| # Convert y to categorical values | |
| y_data = y_data.astype('category') | |
| return y_data.cat.categories |