Spaces:

Photon08
/

ml_studio_no_code

Runtime error

App Files Files Community

Photon08 commited on Mar 2, 2023

Commit

68fbecb

1 Parent(s): 4935f60

Create app.py

Browse files

Files changed (1) hide show

app.py +198 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestRegressor,HistGradientBoostingRegressor
+#import xgboost
+from sklearn.compose import ColumnTransformer
+#import pickle
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, r2_score
+import streamlit as st
+import time
+#import shap
+#import matplotlib as mt
+def train(data=None,problem="Regression",model="LinearRegression",label=None):
+    df = pd.read_csv(data)
+    target = df[label].copy()
+    features = df.drop(label, axis=1)
+    X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True)
+    num_features = []
+    cat_features = []
+    cols = list(features.columns)
+    for i in cols:
+        if df[i].dtypes == "object":
+            cat_features.append(i)
+        else:
+            num_features.append(i)
+    if problem == "Regression":
+        trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
+                                 ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)])
+        if model == "LinearRegression":
+            final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())])
+        elif model == "RandomForestRegressor":
+            final_pipe = Pipeline([("transformers",trf),("rf_reg_model",RandomForestRegressor(random_state=42))])
+        else:
+            final_pipe = Pipeline([("transformers",trf),("reg_model",HistGradientBoostingRegressor(random_state=42))])
+        final_pipe.fit(X_train,y_train)
+        final_pipe.fit(X_train,y_train)
+        #model = pickle.dump(final_pipe,open("regression_model","wb"))
+        #y_hat = model.predict(X_train)
+        return final_pipe, X_train,X_test,y_train,y_test
+    if problem == "Classification":
+        if model == "GradientBoosting":
+            trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
+                                    ("cat_trf",OneHotEncoder(),cat_features)])
+            lbl_encd = LabelEncoder()
+            lbl_encd.fit(y_train)
+            y_train_trf = lbl_encd.transform(y_train)
+            y_test_trf = lbl_encd.fit(y_test)
+            final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))])
+            final_pipe.fit(X_train,y_train_trf)
+            #file = open("model")
+            #model = pickle.dump(final_pipe,("","wb"))
+            return final_pipe, X_train,X_test,y_train_trf,y_test_trf
+        elif model == "LogisticRegression":
+            trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
+                                    ("cat_trf",OneHotEncoder(),cat_features)])
+            lbl_encd = LabelEncoder()
+            lbl_encd.fit(y_train)
+            y_train_trf = lbl_encd.transform(y_train)
+            y_test_trf = lbl_encd.fit(y_test)
+            final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))])
+            final_pipe.fit(X_train,y_train_trf)
+            #file = open("model")
+            #model = pickle.dump(final_pipe,("","wb"))
+            return final_pipe, X_train,X_test,y_train_trf,y_test_trf
+def predict(model=None,x=None):
+    #m = pickle.load(open(model,"rb"))
+    y_hat = model.predict(x)
+    return y_hat
+def evaluate(y_true,y_pred, problem="Regression"):
+    if problem == "Regression":
+        metric = r2_score(y_true,y_pred)
+        return metric
+    else:
+        metric = classification_report(y_true,y_pred,output_dict=True)
+        met_df = pd.DataFrame(metric).transpose()
+        file = met_df.to_csv().encode('utf-8')
+        return file
+st.title("No Code Machine Learning Studio :six_pointed_star:")
+st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg")
+st.subheader("Plug & Play Portal for Machine Learing")
+prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification"))
+train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"])
+if prob_type == "Classification":
+    model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression"))
+else:
+    model = st.selectbox(label="Plase Select your classification model: ", options=("LinearRegression","RandomForestRegressor","HistGradientBoostingRegressor"))
+#def explain(model="LinearRegression",train_data=None,test_data=None):
+#explainer = shap.LinearExplainer(model,train_data,feature_dependence=False)
+#    shap_values = explainer.shap_values(test_data)
+#    shap.summary_plot(shap_values,test_data,plot_type="violin",show=False)
+#    mt.pyplot.gcf().axes[-1].set_box_aspect(10)
+y = st.text_input("Please write your target column name:  ")
+#num_f = st.text_input("Please write your numerical feature names(separted by ","): ").split(",")
+#cat_f = st.text_input("Please write your categorical feature names(separted by ","): ").split(",")
+if st.button("Train"):
+    time.sleep(1)
+    if prob_type=="Classification":
+        with st.progress(10,"Discovering the dataset..."):
+            time.sleep(0.5)
+            st.progress(20, "Applying the preprocessing steps...")
+            time.sleep(1)
+            st.progress(25,"Training engine has started...")
+            st.progress(50, "Training the model...")
+            model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y)
+            time.sleep(2)
+            st.progress(75, "Training complete...")
+            st.progress(85, "Evaluating model performance...")
+            st.progress(90, "Generating Classification report...")
+            time.sleep(1)
+            st.progress(100, "Complete! :100:")
+        y_hat_train = predict(model_,X_train)
+        y_hat_test = predict(model_,X_test)
+        report = evaluate(y_train,y_hat_train,prob_type)
+        st.download_button(label="Click here to download the report",data=report, mime="text/csv")
+        time.sleep(2)
+        st.write("Classification report of testing dataset: ")
+        report_test = evaluate(y_train,y_hat_train,prob_type)
+        st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv")
+        st.success("Report generated successfully! :beers:")
+        time.sleep(20)
+    else:
+        with st.progress(10,"Discovering the dataset..."):
+            time.sleep(0.5)
+            st.progress(20, "Applying the preprocessing steps...")
+            time.sleep(1)
+            st.progress(25,"Training engine has started...")
+            st.progress(50, "Training the model...")
+            model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y)
+            time.sleep(2)
+            st.progress(75, "Training complete...")
+            st.progress(85, "Evaluating model performance...")
+            st.progress(90, "Generating Regression metrics...")
+            time.sleep(1)
+            st.progress(100, "Complete! :100:")
+        y_hat_train = predict(model_,X_train)
+        y_hat_test = predict(model_,X_test)
+        st.write("r2 score on training set: ")
+        st.write(evaluate(y_train,y_hat_train))
+        st.write("r2 score on test set: ")
+        time.sleep(0.5)
+        st.write(evaluate(y_test,y_hat_test,prob_type))
+        st.success("Metrics generated successfully! :beers:")