import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.ensemble import GradientBoostingClassifier, RandomForestRegressor,HistGradientBoostingRegressor #import xgboost from sklearn.compose import ColumnTransformer #import pickle from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, r2_score import streamlit as st import time #import shap #import matplotlib as mt def train(data=None,problem="Regression",model="LinearRegression",label=None): df = pd.read_csv(data) target = df[label].copy() features = df.drop(label, axis=1) X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True) num_features = [] cat_features = [] cols = list(features.columns) for i in cols: if df[i].dtypes == "object": cat_features.append(i) else: num_features.append(i) if problem == "Regression": trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)]) if model == "LinearRegression": final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())]) elif model == "RandomForestRegressor": final_pipe = Pipeline([("transformers",trf),("rf_reg_model",RandomForestRegressor(random_state=42))]) else: final_pipe = Pipeline([("transformers",trf),("reg_model",HistGradientBoostingRegressor(random_state=42))]) final_pipe.fit(X_train,y_train) final_pipe.fit(X_train,y_train) #model = pickle.dump(final_pipe,open("regression_model","wb")) #y_hat = model.predict(X_train) return final_pipe, X_train,X_test,y_train,y_test if problem == "Classification": if model == "GradientBoosting": trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), ("cat_trf",OneHotEncoder(),cat_features)]) lbl_encd = LabelEncoder() lbl_encd.fit(y_train) y_train_trf = lbl_encd.transform(y_train) y_test_trf = lbl_encd.fit(y_test) final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))]) final_pipe.fit(X_train,y_train_trf) #file = open("model") #model = pickle.dump(final_pipe,("","wb")) return final_pipe, X_train,X_test,y_train_trf,y_test_trf elif model == "LogisticRegression": trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), ("cat_trf",OneHotEncoder(),cat_features)]) lbl_encd = LabelEncoder() lbl_encd.fit(y_train) y_train_trf = lbl_encd.transform(y_train) y_test_trf = lbl_encd.fit(y_test) final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))]) final_pipe.fit(X_train,y_train_trf) #file = open("model") #model = pickle.dump(final_pipe,("","wb")) return final_pipe, X_train,X_test,y_train_trf,y_test_trf def predict(model=None,x=None): #m = pickle.load(open(model,"rb")) y_hat = model.predict(x) return y_hat def evaluate(y_true,y_pred, problem="Regression"): if problem == "Regression": metric = r2_score(y_true,y_pred) return metric else: metric = classification_report(y_true,y_pred,output_dict=True) met_df = pd.DataFrame(metric).transpose() file = met_df.to_csv().encode('utf-8') return file st.title("No Code Machine Learning Studio :six_pointed_star:") st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg") st.subheader("Plug & Play Portal for Machine Learing") prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification")) train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"]) if prob_type == "Classification": model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression")) else: model = st.selectbox(label="Plase Select your classification model: ", options=("LinearRegression","RandomForestRegressor","HistGradientBoostingRegressor")) #def explain(model="LinearRegression",train_data=None,test_data=None): #explainer = shap.LinearExplainer(model,train_data,feature_dependence=False) # shap_values = explainer.shap_values(test_data) # shap.summary_plot(shap_values,test_data,plot_type="violin",show=False) # mt.pyplot.gcf().axes[-1].set_box_aspect(10) y = st.text_input("Please write your target column name: ") #num_f = st.text_input("Please write your numerical feature names(separted by ","): ").split(",") #cat_f = st.text_input("Please write your categorical feature names(separted by ","): ").split(",") if st.button("Train"): time.sleep(1) if prob_type=="Classification": with st.progress(10,"Discovering the dataset..."): time.sleep(0.5) st.progress(20, "Applying the preprocessing steps...") time.sleep(1) st.progress(25,"Training engine has started...") st.progress(50, "Training the model...") model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) time.sleep(2) st.progress(75, "Training complete...") st.progress(85, "Evaluating model performance...") st.progress(90, "Generating Classification report...") time.sleep(1) st.progress(100, "Complete! :100:") y_hat_train = predict(model_,X_train) y_hat_test = predict(model_,X_test) report = evaluate(y_train,y_hat_train,prob_type) st.download_button(label="Click here to download the report",data=report, mime="text/csv") time.sleep(2) st.write("Classification report of testing dataset: ") report_test = evaluate(y_train,y_hat_train,prob_type) st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv") st.success("Report generated successfully! :beers:") time.sleep(20) else: with st.progress(10,"Discovering the dataset..."): time.sleep(0.5) st.progress(20, "Applying the preprocessing steps...") time.sleep(1) st.progress(25,"Training engine has started...") st.progress(50, "Training the model...") model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) time.sleep(2) st.progress(75, "Training complete...") st.progress(85, "Evaluating model performance...") st.progress(90, "Generating Regression metrics...") time.sleep(1) st.progress(100, "Complete! :100:") y_hat_train = predict(model_,X_train) y_hat_test = predict(model_,X_test) st.write("r2 score on training set: ") st.write(evaluate(y_train,y_hat_train)) st.write("r2 score on test set: ") time.sleep(0.5) st.write(evaluate(y_test,y_hat_test,prob_type)) st.success("Metrics generated successfully! :beers:")