| import streamlit as st | |
| import numpy as np | |
| from sklearn.pipeline import make_pipeline | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.svm import SVC | |
| def process(data): | |
| if data[0] == None or data[1] == None: # if either training or testing dataset is still missing | |
| st.info('Please Upload Data') | |
| return None | |
| if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes): | |
| st.info('Please Upload Numerica Data.') | |
| return None | |
| x_train = data[0].iloc[:,:-1] | |
| y_train = data[0].iloc[:,-1] | |
| #st.write(x_train.shape) | |
| x_test = data[1].iloc[:,:x_train.shape[1]] | |
| #st.dataframe(data[1]) | |
| #st.write(x_test.shape) | |
| if len(x_train.columns) != len(x_test.columns): | |
| st.info('Training and testing datasets have different column number, cannot perform classification.') | |
| return None | |
| clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) | |
| clf.fit(x_train, y_train) | |
| pred = clf.predict(x_test) | |
| x_test[data[0].columns[-1]] = pred | |
| return x_test |