Photon08 commited on
Commit
68fbecb
·
1 Parent(s): 4935f60

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -0
app.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder
4
+ from sklearn.linear_model import LinearRegression, LogisticRegression
5
+ from sklearn.ensemble import GradientBoostingClassifier, RandomForestRegressor,HistGradientBoostingRegressor
6
+ #import xgboost
7
+ from sklearn.compose import ColumnTransformer
8
+ #import pickle
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import classification_report, r2_score
12
+ import streamlit as st
13
+ import time
14
+ #import shap
15
+ #import matplotlib as mt
16
+
17
+ def train(data=None,problem="Regression",model="LinearRegression",label=None):
18
+
19
+ df = pd.read_csv(data)
20
+
21
+ target = df[label].copy()
22
+ features = df.drop(label, axis=1)
23
+
24
+ X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True)
25
+
26
+ num_features = []
27
+ cat_features = []
28
+ cols = list(features.columns)
29
+ for i in cols:
30
+ if df[i].dtypes == "object":
31
+ cat_features.append(i)
32
+ else:
33
+ num_features.append(i)
34
+
35
+ if problem == "Regression":
36
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
37
+ ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)])
38
+
39
+
40
+
41
+
42
+ if model == "LinearRegression":
43
+ final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())])
44
+ elif model == "RandomForestRegressor":
45
+ final_pipe = Pipeline([("transformers",trf),("rf_reg_model",RandomForestRegressor(random_state=42))])
46
+ else:
47
+ final_pipe = Pipeline([("transformers",trf),("reg_model",HistGradientBoostingRegressor(random_state=42))])
48
+
49
+ final_pipe.fit(X_train,y_train)
50
+
51
+ final_pipe.fit(X_train,y_train)
52
+
53
+ #model = pickle.dump(final_pipe,open("regression_model","wb"))
54
+
55
+ #y_hat = model.predict(X_train)
56
+
57
+ return final_pipe, X_train,X_test,y_train,y_test
58
+ if problem == "Classification":
59
+ if model == "GradientBoosting":
60
+
61
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
62
+ ("cat_trf",OneHotEncoder(),cat_features)])
63
+
64
+
65
+ lbl_encd = LabelEncoder()
66
+
67
+ lbl_encd.fit(y_train)
68
+ y_train_trf = lbl_encd.transform(y_train)
69
+
70
+ y_test_trf = lbl_encd.fit(y_test)
71
+
72
+ final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))])
73
+
74
+ final_pipe.fit(X_train,y_train_trf)
75
+ #file = open("model")
76
+ #model = pickle.dump(final_pipe,("","wb"))
77
+
78
+ return final_pipe, X_train,X_test,y_train_trf,y_test_trf
79
+ elif model == "LogisticRegression":
80
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
81
+ ("cat_trf",OneHotEncoder(),cat_features)])
82
+
83
+
84
+ lbl_encd = LabelEncoder()
85
+
86
+ lbl_encd.fit(y_train)
87
+ y_train_trf = lbl_encd.transform(y_train)
88
+
89
+ y_test_trf = lbl_encd.fit(y_test)
90
+
91
+ final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))])
92
+
93
+ final_pipe.fit(X_train,y_train_trf)
94
+ #file = open("model")
95
+ #model = pickle.dump(final_pipe,("","wb"))
96
+
97
+ return final_pipe, X_train,X_test,y_train_trf,y_test_trf
98
+
99
+
100
+ def predict(model=None,x=None):
101
+
102
+ #m = pickle.load(open(model,"rb"))
103
+ y_hat = model.predict(x)
104
+
105
+ return y_hat
106
+
107
+ def evaluate(y_true,y_pred, problem="Regression"):
108
+
109
+ if problem == "Regression":
110
+ metric = r2_score(y_true,y_pred)
111
+ return metric
112
+ else:
113
+ metric = classification_report(y_true,y_pred,output_dict=True)
114
+ met_df = pd.DataFrame(metric).transpose()
115
+ file = met_df.to_csv().encode('utf-8')
116
+
117
+ return file
118
+ st.title("No Code Machine Learning Studio :six_pointed_star:")
119
+
120
+ st.image(image="https://www.silvertouchtech.co.uk/wp-content/uploads/2020/05/ai-banner.jpg")
121
+ st.subheader("Plug & Play Portal for Machine Learing")
122
+
123
+ prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification"))
124
+
125
+ train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"])
126
+
127
+ if prob_type == "Classification":
128
+
129
+ model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression"))
130
+ else:
131
+ model = st.selectbox(label="Plase Select your classification model: ", options=("LinearRegression","RandomForestRegressor","HistGradientBoostingRegressor"))
132
+
133
+
134
+ #def explain(model="LinearRegression",train_data=None,test_data=None):
135
+ #explainer = shap.LinearExplainer(model,train_data,feature_dependence=False)
136
+ # shap_values = explainer.shap_values(test_data)
137
+
138
+ # shap.summary_plot(shap_values,test_data,plot_type="violin",show=False)
139
+ # mt.pyplot.gcf().axes[-1].set_box_aspect(10)
140
+
141
+
142
+ y = st.text_input("Please write your target column name: ")
143
+ #num_f = st.text_input("Please write your numerical feature names(separted by ","): ").split(",")
144
+ #cat_f = st.text_input("Please write your categorical feature names(separted by ","): ").split(",")
145
+
146
+ if st.button("Train"):
147
+
148
+ time.sleep(1)
149
+
150
+
151
+ if prob_type=="Classification":
152
+ with st.progress(10,"Discovering the dataset..."):
153
+ time.sleep(0.5)
154
+ st.progress(20, "Applying the preprocessing steps...")
155
+ time.sleep(1)
156
+ st.progress(25,"Training engine has started...")
157
+ st.progress(50, "Training the model...")
158
+ model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y)
159
+ time.sleep(2)
160
+ st.progress(75, "Training complete...")
161
+ st.progress(85, "Evaluating model performance...")
162
+ st.progress(90, "Generating Classification report...")
163
+ time.sleep(1)
164
+ st.progress(100, "Complete! :100:")
165
+ y_hat_train = predict(model_,X_train)
166
+ y_hat_test = predict(model_,X_test)
167
+ report = evaluate(y_train,y_hat_train,prob_type)
168
+
169
+ st.download_button(label="Click here to download the report",data=report, mime="text/csv")
170
+ time.sleep(2)
171
+ st.write("Classification report of testing dataset: ")
172
+ report_test = evaluate(y_train,y_hat_train,prob_type)
173
+ st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv")
174
+ st.success("Report generated successfully! :beers:")
175
+ time.sleep(20)
176
+ else:
177
+ with st.progress(10,"Discovering the dataset..."):
178
+ time.sleep(0.5)
179
+ st.progress(20, "Applying the preprocessing steps...")
180
+ time.sleep(1)
181
+ st.progress(25,"Training engine has started...")
182
+ st.progress(50, "Training the model...")
183
+ model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y)
184
+ time.sleep(2)
185
+ st.progress(75, "Training complete...")
186
+ st.progress(85, "Evaluating model performance...")
187
+ st.progress(90, "Generating Regression metrics...")
188
+ time.sleep(1)
189
+ st.progress(100, "Complete! :100:")
190
+ y_hat_train = predict(model_,X_train)
191
+ y_hat_test = predict(model_,X_test)
192
+ st.write("r2 score on training set: ")
193
+ st.write(evaluate(y_train,y_hat_train))
194
+ st.write("r2 score on test set: ")
195
+ time.sleep(0.5)
196
+
197
+ st.write(evaluate(y_test,y_hat_test,prob_type))
198
+ st.success("Metrics generated successfully! :beers:")