Photon08 commited on
Commit
b335a3a
·
1 Parent(s): 468dd27

Create app.py

Browse files

Initial test: First push

Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder
4
+ from sklearn.linear_model import LinearRegression, LogisticRegression
5
+ from sklearn.ensemble import GradientBoostingClassifier
6
+ import xgboost
7
+ from sklearn.compose import ColumnTransformer
8
+ import pickle
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import classification_report, r2_score
12
+ import streamlit as st
13
+ import shap
14
+ import matplotlib as mt
15
+
16
+ def train(data=None,problem="Regression",model="LinearRegression",label=None):
17
+
18
+ df = pd.read_csv(data)
19
+
20
+ target = df[label].copy()
21
+ features = df.drop(label, axis=1)
22
+
23
+ X_train,X_test,y_train,y_test = train_test_split(features,target,test_size=0.20,random_state=42,shuffle=True,stratify=target)
24
+
25
+ num_features = []
26
+ cat_features = []
27
+ cols = list(features.columns)
28
+ for i in cols:
29
+ if df[i].dtypes == "object":
30
+ cat_features.append(i)
31
+ else:
32
+ num_features.append(i)
33
+
34
+ if problem == "Regression":
35
+ if cat_features[0]!="":
36
+
37
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
38
+ ("cat_trf",OneHotEncoder(sparse_output=False),cat_features)])
39
+ else:
40
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features)])
41
+
42
+
43
+
44
+ final_pipe = Pipeline([("transformers",trf),("reg_model",LinearRegression())])
45
+
46
+ final_pipe.fit(X_train,y_train)
47
+
48
+ #model = pickle.dump(final_pipe,open("regression_model","wb"))
49
+
50
+ #y_hat = model.predict(X_train)
51
+
52
+ return final_pipe, X_train,X_test,y_train,y_test
53
+ if problem == "Classification":
54
+ if model == "GradientBoosting":
55
+
56
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
57
+ ("cat_trf",OneHotEncoder(),cat_features)])
58
+
59
+
60
+ lbl_encd = LabelEncoder()
61
+
62
+ lbl_encd.fit(y_train)
63
+ y_train_trf = lbl_encd.transform(y_train)
64
+
65
+ y_test_trf = lbl_encd.fit(y_test)
66
+
67
+ final_pipe = Pipeline([("transformers",trf),("clf_model",GradientBoostingClassifier(random_state=42))])
68
+
69
+ final_pipe.fit(X_train,y_train_trf)
70
+ #file = open("model")
71
+ #model = pickle.dump(final_pipe,("","wb"))
72
+
73
+ return final_pipe, X_train,X_test,y_train_trf,y_test_trf
74
+ elif model == "LogisticRegression":
75
+ trf = ColumnTransformer([("num_trf",StandardScaler(),num_features),
76
+ ("cat_trf",OneHotEncoder(),cat_features)])
77
+
78
+
79
+ lbl_encd = LabelEncoder()
80
+
81
+ lbl_encd.fit(y_train)
82
+ y_train_trf = lbl_encd.transform(y_train)
83
+
84
+ y_test_trf = lbl_encd.fit(y_test)
85
+
86
+ final_pipe = Pipeline([("transformers",trf),("clf_model",LogisticRegression(random_state=42))])
87
+
88
+ final_pipe.fit(X_train,y_train_trf)
89
+ #file = open("model")
90
+ #model = pickle.dump(final_pipe,("","wb"))
91
+
92
+ return final_pipe, X_train,X_test,y_train_trf,y_test_trf
93
+
94
+
95
+ def predict(model=None,x=None):
96
+
97
+ #m = pickle.load(open(model,"rb"))
98
+ y_hat = model.predict(x)
99
+
100
+ return y_hat
101
+
102
+ def evaluate(y_true,y_pred, problem="Regression"):
103
+
104
+ if problem == "Regression":
105
+ metric = r2_score(y_true,y_pred)
106
+ return metric
107
+ else:
108
+ metric = classification_report(y_true,y_pred,output_dict=True)
109
+ met_df = pd.DataFrame(metric).transpose()
110
+ file = met_df.to_csv().encode('utf-8')
111
+
112
+ return file
113
+
114
+ prob_type = st.selectbox(label="Please select your ML problem type: ",options=("Regression","Classification"))
115
+
116
+ train_data = st.file_uploader(label="Please upload your training dataset",type=["csv"])
117
+
118
+ if prob_type == "Classification":
119
+
120
+ model = st.selectbox(label="Plase Select your classification model: ", options=("GradientBoosting","LogisticRegression"))
121
+ else:
122
+ model = "LinearRegression"
123
+
124
+
125
+ def explain(model="LinearRegression",train_data=None,test_data=None):
126
+ explainer = shap.LinearExplainer(model,train_data,feature_dependence=False)
127
+ shap_values = explainer.shap_values(test_data)
128
+
129
+ shap.summary_plot(shap_values,test_data,plot_type="violin",show=False)
130
+ mt.pyplot.gcf().axes[-1].set_box_aspect(10)
131
+
132
+
133
+ y = st.text_input("Please write your target column name: ")
134
+ #num_f = st.text_input("Please write your numerical feature names(separted by ","): ").split(",")
135
+ #cat_f = st.text_input("Please write your categorical feature names(separted by ","): ").split(",")
136
+
137
+ if st.button("Train"):
138
+ #if cat_f[0]!="":
139
+ model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y)
140
+ #else:
141
+ #model_, X_train,X_test,y_train,y_test = train(data=train_data,problem=prob_type,model=model, label=y,num_features=num_f,cat_features=cat_f)
142
+
143
+ y_hat_train = predict(model_,X_train)
144
+ y_hat_test = predict(model_,X_test)
145
+
146
+ if prob_type == "Classification":
147
+ st.write("Classification report of training set: ")
148
+ report = evaluate(y_train,y_hat_train,prob_type)
149
+
150
+ st.download_button(label="Click here to download the report",data=report, mime="text/csv")
151
+ st.write("Classification report of testing dataset: ")
152
+ report_test = evaluate(y_train,y_hat_train,prob_type)
153
+ st.download_button(key="test",label="Click here to download the report",data=report_test, mime="text/csv")
154
+
155
+ else:
156
+ st.write("r2 score on training set: ")
157
+ st.write(evaluate(y_train,y_hat_train))
158
+ st.write("r2 score on test set: ")
159
+
160
+ st.write(evaluate(y_test,y_hat_test,prob_type))
161
+ #explain(model_.named_steps["reg_model"],X_train,X_test)