Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| # def preprocess_kaggle(dataset_path): | |
| # # import the dataset | |
| # dataset_df = pd.read_csv(dataset_path) | |
| # # Preprocess | |
| # dataset_df = dataset_df.apply(lambda col: col.str.strip()) | |
| # test = pd.get_dummies(dataset_df.filter(regex='Symptom'), prefix='', prefix_sep='') | |
| # test = test.groupby(test.columns, axis=1).agg(np.max) | |
| # clean_df = pd.merge(test,dataset_df['Disease'], left_index=True, right_index=True) | |
| # return clean_df | |
| def prepare_symptoms_array(symptoms): | |
| ''' | |
| Convert a list of symptoms to a ndim(X) (in this case 131) that matches the | |
| dataframe used to train the machine learning model | |
| Output: | |
| - X (np.array) = X values ready as input to ML model to get prediction | |
| ''' | |
| symptoms_array = np.zeros((1,133)) | |
| df = pd.read_csv('data/clean_dataset.tsv', sep='\t') | |
| for symptom in symptoms: | |
| symptom_idx = df.columns.get_loc(symptom) | |
| symptoms_array[0, symptom_idx] = 1 | |
| return symptoms_array |