Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder | |
| from sklearn.linear_model import LinearRegression, LogisticRegression | |
| from sklearn.ensemble import GradientBoostingClassifier, RandomForestRegressor,HistGradientBoostingRegressor | |
| #import xgboost | |
| from sklearn.compose import ColumnTransformer | |
| #import pickle | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import classification_report, r2_score | |
| import streamlit as st | |
| import time | |
| #import shap | |
| #import matplotlib as mt | |
| def train(data=None,problem="Regression",model="LinearRegression",label=None): | |
| df = pd.read_csv(data) | |
| target = df[label].copy() | |
| features = df.drop(label, axis=1) | |
| X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True) | |
| num_features = [] | |
| cat_features = [] | |
| cols = list(features.columns) | |
| for i in cols: | |
| if df[i].dtypes == "object": | |
| cat_features.append(i) | |
| else: | |
| num_features.append(i) | |
| if problem == "Regression": | |
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), | |
| ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)]) | |
| if model == "LinearRegression": | |
| final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())]) | |
| elif model == "RandomForestRegressor": | |
| final_pipe = Pipeline([("transformers",trf),("rf_reg_model",RandomForestRegressor(random_state=42))]) | |
| else: | |
| final_pipe = Pipeline([("transformers",trf),("reg_model",HistGradientBoostingRegressor(random_state=42))]) | |
| final_pipe.fit(X_train,y_train) | |
| final_pipe.fit(X_train,y_train) | |
| #model = pickle.dump(final_pipe,open("regression_model","wb")) | |
| #y_hat = model.predict(X_train) | |
| return final_pipe, X_train,X_test,y_train,y_test | |
| if problem == "Classification": | |
| if model == "GradientBoosting": | |
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), | |
| ("cat_trf",OneHotEncoder(),cat_features)]) | |
| lbl_encd = LabelEncoder() | |
| lbl_encd.fit(y_train) | |
| y_train_trf = lbl_encd.transform(y_train) | |
| y_test_trf = lbl_encd.fit(y_test) | |
| final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))]) | |
| final_pipe.fit(X_train,y_train_trf) | |
| #file = open("model") | |
| #model = pickle.dump(final_pipe,("","wb")) | |
| return final_pipe, X_train,X_test,y_train_trf,y_test_trf | |
| elif model == "LogisticRegression": | |
| trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), | |
| ("cat_trf",OneHotEncoder(),cat_features)]) | |
| lbl_encd = LabelEncoder() | |
| lbl_encd.fit(y_train) | |
| y_train_trf = lbl_encd.transform(y_train) | |
| y_test_trf = lbl_encd.fit(y_test) | |
| final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))]) | |
| final_pipe.fit(X_train,y_train_trf) | |
| #file = open("model") | |
| #model = pickle.dump(final_pipe,("","wb")) | |
| return final_pipe, X_train,X_test,y_train_trf,y_test_trf | |
| def predict(model=None,x=None): | |
| #m = pickle.load(open(model,"rb")) | |
| y_hat = model.predict(x) | |
| return y_hat | |
| def evaluate(y_true,y_pred, problem="Regression"): | |
| if problem == "Regression": | |
| metric = r2_score(y_true,y_pred) | |
| return metric | |
| else: | |
| metric = classification_report(y_true,y_pred,output_dict=True) | |
| met_df = pd.DataFrame(metric).transpose() | |
| file = met_df.to_csv().encode('utf-8') | |
| return file | |
| st.title("No Code Machine Learning Studio :six_pointed_star:") | |
| st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg") | |
| st.subheader("Plug & Play Portal for Machine Learing") | |
| prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification")) | |
| train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"]) | |
| if prob_type == "Classification": | |
| model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression")) | |
| else: | |
| model = st.selectbox(label="Plase Select your classification model: ", options=("LinearRegression","RandomForestRegressor","HistGradientBoostingRegressor")) | |
| #def explain(model="LinearRegression",train_data=None,test_data=None): | |
| #explainer = shap.LinearExplainer(model,train_data,feature_dependence=False) | |
| # shap_values = explainer.shap_values(test_data) | |
| # shap.summary_plot(shap_values,test_data,plot_type="violin",show=False) | |
| # mt.pyplot.gcf().axes[-1].set_box_aspect(10) | |
| y = st.text_input("Please write your target column name: ") | |
| #num_f = st.text_input("Please write your numerical feature names(separted by ","): ").split(",") | |
| #cat_f = st.text_input("Please write your categorical feature names(separted by ","): ").split(",") | |
| if st.button("Train"): | |
| time.sleep(1) | |
| if prob_type=="Classification": | |
| with st.progress(10,"Discovering the dataset..."): | |
| time.sleep(0.5) | |
| st.progress(20, "Applying the preprocessing steps...") | |
| time.sleep(1) | |
| st.progress(25,"Training engine has started...") | |
| st.progress(50, "Training the model...") | |
| model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) | |
| time.sleep(2) | |
| st.progress(75, "Training complete...") | |
| st.progress(85, "Evaluating model performance...") | |
| st.progress(90, "Generating Classification report...") | |
| time.sleep(1) | |
| st.progress(100, "Complete! :100:") | |
| y_hat_train = predict(model_,X_train) | |
| y_hat_test = predict(model_,X_test) | |
| report = evaluate(y_train,y_hat_train,prob_type) | |
| st.download_button(label="Click here to download the report",data=report, mime="text/csv") | |
| time.sleep(2) | |
| st.write("Classification report of testing dataset: ") | |
| report_test = evaluate(y_train,y_hat_train,prob_type) | |
| st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv") | |
| st.success("Report generated successfully! :beers:") | |
| time.sleep(20) | |
| else: | |
| with st.progress(10,"Discovering the dataset..."): | |
| time.sleep(0.5) | |
| st.progress(20, "Applying the preprocessing steps...") | |
| time.sleep(1) | |
| st.progress(25,"Training engine has started...") | |
| st.progress(50, "Training the model...") | |
| model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) | |
| time.sleep(2) | |
| st.progress(75, "Training complete...") | |
| st.progress(85, "Evaluating model performance...") | |
| st.progress(90, "Generating Regression metrics...") | |
| time.sleep(1) | |
| st.progress(100, "Complete! :100:") | |
| y_hat_train = predict(model_,X_train) | |
| y_hat_test = predict(model_,X_test) | |
| st.write("r2 score on training set: ") | |
| st.write(evaluate(y_train,y_hat_train)) | |
| st.write("r2 score on test set: ") | |
| time.sleep(0.5) | |
| st.write(evaluate(y_test,y_hat_test,prob_type)) | |
| st.success("Metrics generated successfully! :beers:") |