|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder |
|
|
from sklearn.linear_model import LinearRegression, LogisticRegression |
|
|
from sklearn.ensemble import GradientBoostingClassifier |
|
|
import xgboost |
|
|
from sklearn.compose import ColumnTransformer |
|
|
|
|
|
from sklearn.pipeline import Pipeline |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import classification_report, r2_score |
|
|
import streamlit as st |
|
|
import shap |
|
|
import matplotlib as mt |
|
|
|
|
|
def train(data=None,problem="Regression",model="LinearRegression",label=None): |
|
|
|
|
|
df = pd.read_csv(data) |
|
|
|
|
|
target = df[label].copy() |
|
|
features = df.drop(label, axis=1) |
|
|
|
|
|
X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True,stratify=target) |
|
|
|
|
|
num_features = [] |
|
|
cat_features = [] |
|
|
cols = list(features.columns) |
|
|
for i in cols: |
|
|
if df[i].dtypes == "object": |
|
|
cat_features.append(i) |
|
|
else: |
|
|
num_features.append(i) |
|
|
|
|
|
if problem == "Regression": |
|
|
if cat_features[0]!="": |
|
|
|
|
|
trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), |
|
|
("cat_trf",OneHotEncoder(sparse_output=False),cat_features)]) |
|
|
else: |
|
|
trf = ColumnTransformer([("num_trf",StandardScaler(),num_features)]) |
|
|
|
|
|
|
|
|
|
|
|
final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())]) |
|
|
|
|
|
final_pipe.fit(X_train,y_train) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return final_pipe, X_train,X_test,y_train,y_test |
|
|
if problem == "Classification": |
|
|
if model == "GradientBoosting": |
|
|
|
|
|
trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), |
|
|
("cat_trf",OneHotEncoder(),cat_features)]) |
|
|
|
|
|
|
|
|
lbl_encd = LabelEncoder() |
|
|
|
|
|
lbl_encd.fit(y_train) |
|
|
y_train_trf = lbl_encd.transform(y_train) |
|
|
|
|
|
y_test_trf = lbl_encd.fit(y_test) |
|
|
|
|
|
final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))]) |
|
|
|
|
|
final_pipe.fit(X_train,y_train_trf) |
|
|
|
|
|
|
|
|
|
|
|
return final_pipe, X_train,X_test,y_train_trf,y_test_trf |
|
|
elif model == "LogisticRegression": |
|
|
trf = ColumnTransformer([("num_trf",StandardScaler(),num_features), |
|
|
("cat_trf",OneHotEncoder(),cat_features)]) |
|
|
|
|
|
|
|
|
lbl_encd = LabelEncoder() |
|
|
|
|
|
lbl_encd.fit(y_train) |
|
|
y_train_trf = lbl_encd.transform(y_train) |
|
|
|
|
|
y_test_trf = lbl_encd.fit(y_test) |
|
|
|
|
|
final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))]) |
|
|
|
|
|
final_pipe.fit(X_train,y_train_trf) |
|
|
|
|
|
|
|
|
|
|
|
return final_pipe, X_train,X_test,y_train_trf,y_test_trf |
|
|
|
|
|
|
|
|
def predict(model=None,x=None): |
|
|
|
|
|
|
|
|
y_hat = model.predict(x) |
|
|
|
|
|
return y_hat |
|
|
|
|
|
def evaluate(y_true,y_pred, problem="Regression"): |
|
|
|
|
|
if problem == "Regression": |
|
|
metric = r2_score(y_true,y_pred) |
|
|
return metric |
|
|
else: |
|
|
metric = classification_report(y_true,y_pred,output_dict=True) |
|
|
met_df = pd.DataFrame(metric).transpose() |
|
|
file = met_df.to_csv().encode('utf-8') |
|
|
|
|
|
return file |
|
|
|
|
|
st.title("No Code Machine Learning Studio: ") |
|
|
|
|
|
st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg") |
|
|
st.write("Drag & Drop Portal for Machine Learing") |
|
|
prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification")) |
|
|
|
|
|
train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"]) |
|
|
|
|
|
if prob_type == "Classification": |
|
|
|
|
|
model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression")) |
|
|
else: |
|
|
model = "LinearRegression" |
|
|
|
|
|
|
|
|
def explain(model="LinearRegression",train_data=None,test_data=None): |
|
|
explainer = shap.LinearExplainer(model,train_data,feature_dependence=False) |
|
|
shap_values = explainer.shap_values(test_data) |
|
|
|
|
|
shap.summary_plot(shap_values,test_data,plot_type="violin",show=False) |
|
|
mt.pyplot.gcf().axes[-1].set_box_aspect(10) |
|
|
|
|
|
|
|
|
y = st.text_input("Please write your target column name: ") |
|
|
|
|
|
|
|
|
|
|
|
if st.button("Train"): |
|
|
|
|
|
model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y) |
|
|
|
|
|
|
|
|
|
|
|
y_hat_train = predict(model_,X_train) |
|
|
y_hat_test = predict(model_,X_test) |
|
|
|
|
|
if prob_type == "Classification": |
|
|
st.write("Classification report of training set: ") |
|
|
report = evaluate(y_train,y_hat_train,prob_type) |
|
|
|
|
|
st.download_button(label="Click here to download the report",data=report, mime="text/csv") |
|
|
st.write("Classification report of testing dataset: ") |
|
|
report_test = evaluate(y_train,y_hat_train,prob_type) |
|
|
st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv") |
|
|
|
|
|
else: |
|
|
st.write("r2 score on training set: ") |
|
|
st.write(evaluate(y_train,y_hat_train)) |
|
|
st.write("r2 score on test set: ") |
|
|
|
|
|
st.write(evaluate(y_test,y_hat_test,prob_type)) |
|
|
|