import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.ensemble import GradientBoostingClassifier import xgboost from sklearn.compose import ColumnTransformer #import pickle from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, r2_score import streamlit as st import shap import matplotlib as mt def train(data=None,problem="Regression",model="LinearRegression",label=None): df = pd.read_csv(data) target = df[label].copy() features = df.drop(label, axis=1) X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True,stratify=target) num_features = [] cat_features = [] cols = list(features.columns) for i in cols: if df[i].dtypes == "object": cat_features.append(i) else: num_features.append(i) if problem == "Regression": if cat_features[0]!="": trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)]) else: trf = ColumnTransformer([("num_trf",StandardScaler(),num_features)]) final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())]) final_pipe.fit(X_train,y_train) #model = pickle.dump(final_pipe,open("regression_model","wb")) #y_hat = model.predict(X_train) return final_pipe, X_train,X_test,y_train,y_test if problem == "Classification": if model == "GradientBoosting": trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), ("cat_trf",OneHotEncoder(),cat_features)]) lbl_encd = LabelEncoder() lbl_encd.fit(y_train) y_train_trf = lbl_encd.transform(y_train) y_test_trf = lbl_encd.fit(y_test) final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))]) final_pipe.fit(X_train,y_train_trf) #file = open("model") #model = pickle.dump(final_pipe,("","wb")) return final_pipe, X_train,X_test,y_train_trf,y_test_trf elif model == "LogisticRegression": trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), ("cat_trf",OneHotEncoder(),cat_features)]) lbl_encd = LabelEncoder() lbl_encd.fit(y_train) y_train_trf = lbl_encd.transform(y_train) y_test_trf = lbl_encd.fit(y_test) final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))]) final_pipe.fit(X_train,y_train_trf) #file = open("model") #model = pickle.dump(final_pipe,("","wb")) return final_pipe, X_train,X_test,y_train_trf,y_test_trf def predict(model=None,x=None): #m = pickle.load(open(model,"rb")) y_hat = model.predict(x) return y_hat def evaluate(y_true,y_pred, problem="Regression"): if problem == "Regression": metric = r2_score(y_true,y_pred) return metric else: metric = classification_report(y_true,y_pred,output_dict=True) met_df = pd.DataFrame(metric).transpose() file = met_df.to_csv().encode('utf-8') return file st.title("No Code Machine Learning Studio: ") st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg") st.write("Drag & Drop Portal for Machine Learing") prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification")) train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"]) if prob_type == "Classification": model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression")) else: model = "LinearRegression" def explain(model="LinearRegression",train_data=None,test_data=None): explainer = shap.LinearExplainer(model,train_data,feature_dependence=False) shap_values = explainer.shap_values(test_data) shap.summary_plot(shap_values,test_data,plot_type="violin",show=False) mt.pyplot.gcf().axes[-1].set_box_aspect(10) y = st.text_input("Please write your target column name: ") #num_f = st.text_input("Please write your numerical feature names(separted by ","): ").split(",") #cat_f = st.text_input("Please write your categorical feature names(separted by ","): ").split(",") if st.button("Train"): #if cat_f[0]!="": model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) #else: #model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y,num_features=num_f,cat_features=cat_f) y_hat_train = predict(model_,X_train) y_hat_test = predict(model_,X_test) if prob_type == "Classification": st.write("Classification report of training set: ") report = evaluate(y_train,y_hat_train,prob_type) st.download_button(label="Click here to download the report",data=report, mime="text/csv") st.write("Classification report of testing dataset: ") report_test = evaluate(y_train,y_hat_train,prob_type) st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv") else: st.write("r2 score on training set: ") st.write(evaluate(y_train,y_hat_train)) st.write("r2 score on test set: ") st.write(evaluate(y_test,y_hat_test,prob_type)) #explain(model_.named_steps["reg_model"],X_train,X_test)