teamnassim commited on
Commit
164bb77
·
1 Parent(s): 0194ec7

added app along with data

Browse files
Files changed (3) hide show
  1. Room-Occupancy-app +1 -0
  2. app.py +239 -0
  3. dataset/Occupancy.csv +0 -0
Room-Occupancy-app ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 98683d86f6b92a390a2fb13abf18652cad23a67c
app.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+
4
+ # EDA Pkgs
5
+ import pandas as pd
6
+ import numpy as np
7
+ import pickle
8
+
9
+ # Viz Pkgs
10
+ import matplotlib.pyplot as plt
11
+ import matplotlib
12
+ matplotlib.use('Agg')
13
+ import seaborn as sns
14
+ from sklearn.linear_model import LinearRegression
15
+ from sklearn.linear_model import LogisticRegression
16
+ from sklearn.model_selection import train_test_split
17
+ from sklearn.preprocessing import MinMaxScaler
18
+ from sklearn.impute import SimpleImputer
19
+ from sklearn.preprocessing import LabelEncoder
20
+ from sklearn.preprocessing import OneHotEncoder
21
+ from sklearn.naive_bayes import GaussianNB
22
+ import tensorflow as tf
23
+ from tensorflow import keras
24
+ from tensorflow.keras import layers
25
+ from sklearn.metrics import accuracy_score
26
+ import keras
27
+ import altair as alt
28
+ from keras.models import Sequential
29
+ from keras.layers import Dense
30
+ from keras.callbacks import History
31
+ import random
32
+ import string
33
+ from pandas.errors import ParserError
34
+ import matplotlib.cm as cm
35
+
36
+ regressor = LogisticRegression()
37
+
38
+ def main():
39
+ """ Common ML Dataset Explorer """
40
+ st.title("Web App by Nasim Obeid")
41
+ html_temp = """
42
+ <div style="background-color:teal;"><p style="color:white;font-size:50px;padding:10px">Room Occupancy Predictor</p></div>
43
+ """
44
+ st.markdown(html_temp,unsafe_allow_html=True)
45
+
46
+ def file_selector(folder_path='./dataset'):
47
+ filenames = os.listdir(folder_path)
48
+ selected_filename = st.selectbox("Select A file",filenames)
49
+ return os.path.join(folder_path,selected_filename)
50
+
51
+ filename = file_selector()
52
+ st.info("You Selected {}".format(filename))
53
+
54
+ # Read Data
55
+ df = pd.read_csv(filename)
56
+ # Show Dataset
57
+
58
+ if st.checkbox("Show Dataset"):
59
+ st.write(df.astype(str))
60
+
61
+ # Show Columns
62
+ if st.button("Column Names"):
63
+ st.write(df.columns)
64
+
65
+ # Show Shape
66
+ if st.checkbox("Shape of Dataset"):
67
+ data_dim = st.radio("Show Dimension By ",("Rows","Columns"))
68
+ if data_dim == 'Rows':
69
+ st.text("Number of Rows")
70
+ st.write(df.shape[0])
71
+ elif data_dim == 'Columns':
72
+ st.text("Number of Columns")
73
+ st.write(df.shape[1])
74
+ else:
75
+ st.write(df.shape)
76
+
77
+ # Select Columns
78
+ if st.checkbox("Select Columns To Show"):
79
+ all_columns = df.columns.tolist()
80
+ selected_columns = st.multiselect("Select",all_columns)
81
+ new_df = df[selected_columns]
82
+ st.dataframe(new_df)
83
+
84
+ # Show Values
85
+ if st.button("Value Counts"):
86
+ st.text("Value Counts By Target/Class")
87
+ st.write(df.iloc[:,-1].value_counts())
88
+
89
+
90
+ # Show Datatypes
91
+ if st.button("Data Types"):
92
+ st.text(df.dtypes)
93
+
94
+
95
+
96
+ # Show Summary
97
+ if st.checkbox("Summary"):
98
+ st.write(df.describe().T)
99
+
100
+ ## Plot and Visualization
101
+
102
+ st.subheader("Data Visualization")
103
+ # Correlation
104
+ # Seaborn Plot
105
+ if st.checkbox("Correlation Plot[Seaborn]"):
106
+ st.write(sns.heatmap(df.corr(),annot=True))
107
+ st.pyplot()
108
+
109
+
110
+ # Pie Chart
111
+ if st.checkbox("Pie Plot"):
112
+ all_columns_names = df.columns.tolist()
113
+ if st.button("Generate Pie Plot"):
114
+ st.success("Generating A Pie Plot")
115
+ st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%"))
116
+ st.pyplot()
117
+
118
+ # Count Plot
119
+ if st.checkbox("Plot of Value Counts"):
120
+ st.text("Value Counts By Target")
121
+ all_columns_names = df.columns.tolist()
122
+ primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names)
123
+ selected_columns_names = st.multiselect("Select Columns",all_columns_names)
124
+ if st.button("Plot"):
125
+ st.text("Generate Plot")
126
+ if selected_columns_names:
127
+ vc_plot = df.groupby(primary_col)[selected_columns_names].count()
128
+ else:
129
+ vc_plot = df.iloc[:,-1].value_counts()
130
+ st.write(vc_plot.plot(kind="bar"))
131
+ st.pyplot()
132
+
133
+
134
+ # Customizable Plot
135
+
136
+ st.subheader("Customizable Plot")
137
+ all_columns_names = df.columns.tolist()
138
+ type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"])
139
+ selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names)
140
+
141
+ if st.button("Generate Plot"):
142
+ st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names))
143
+
144
+ # Plot By Streamlit
145
+ if type_of_plot == 'area':
146
+ cust_data = df[selected_columns_names]
147
+ st.area_chart(cust_data)
148
+
149
+ elif type_of_plot == 'bar':
150
+ cust_data = df[selected_columns_names]
151
+ st.bar_chart(cust_data)
152
+
153
+ elif type_of_plot == 'line':
154
+ cust_data = df[selected_columns_names]
155
+ st.line_chart(cust_data)
156
+
157
+ # Custom Plot
158
+ elif type_of_plot:
159
+ cust_plot= df[selected_columns_names].plot(kind=type_of_plot)
160
+ st.write(cust_plot)
161
+ st.pyplot()
162
+
163
+ #Evaluate Model
164
+ st.subheader("Model, Deployment, and Evaluation")
165
+ # Impute nans with mean for numeris and most frequent for categoricals
166
+ cat_imp = SimpleImputer(strategy="most_frequent")
167
+ if len(df.loc[:,df.dtypes == 'object'].columns) != 0:
168
+ df.loc[:,df.dtypes == 'object'] = cat_imp.fit_transform(df.loc[:,df.dtypes == 'object'])
169
+ imp = SimpleImputer(missing_values = np.nan, strategy="mean")
170
+ df.loc[:,df.dtypes != 'object'] = imp.fit_transform(df.loc[:,df.dtypes != 'object'])
171
+
172
+ # One hot encoding for categorical variables
173
+
174
+ features = st.multiselect('select features and target variable',df.columns.tolist())
175
+
176
+ cats = df.dtypes == 'object'
177
+ le = LabelEncoder()
178
+ for x in df.columns[cats]:
179
+ df.loc[:,x] = le.fit_transform(df[x])
180
+ onehotencoder = OneHotEncoder()
181
+ df.loc[:,~cats].join(pd.DataFrame(data=onehotencoder.
182
+ fit_transform (df.loc[:,cats]).toarray(), columns=
183
+ onehotencoder.get_feature_names()))
184
+
185
+ chosen_target = st.sidebar.selectbox("Please choose target column", (df.columns))
186
+
187
+ X = df.loc[:, df.columns != chosen_target]
188
+ scaler = MinMaxScaler(feature_range=(0,1))
189
+
190
+ scaler.fit(X)
191
+
192
+ X = pd.DataFrame(scaler.transform(X))
193
+ X.columns = df.loc[:, df.columns != chosen_target].columns
194
+ y = df[chosen_target]
195
+ # Train test
196
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
197
+
198
+ type = st.sidebar.selectbox("Algorithm type", ("Classification", "Classification"))
199
+ if type == "Classification":
200
+ chosen_classifier = st.sidebar.selectbox("Please choose a classifier", ('Logistic Regression', 'Naive Bayes'))
201
+ if chosen_classifier == 'Logistic Regression':
202
+ max_iter = st.sidebar.slider('max iterations', 1, 100, 10)
203
+ if type == "Classification":
204
+ if chosen_classifier == 'Logistic Regression':
205
+ alg = LogisticRegression()
206
+ model = alg.fit(X_train, y_train)
207
+ predictions = alg.predict(X_test)
208
+ predictions_train = alg.predict(X_train)
209
+ elif chosen_classifier=='Naive Bayes':
210
+ alg = GaussianNB()
211
+ model = alg.fit(X_train, y_train)
212
+ predictions = alg.predict(X_test)
213
+ predictions_train = alg.predict(X_train)
214
+
215
+
216
+ error_metrics = {}
217
+ if type == 'Classification':
218
+ error_metrics['Accuracy_test'] = accuracy_score(y_test, predictions)
219
+ error_metrics['Accuracy_train'] = accuracy_score(y_train, predictions_train)
220
+ st.write('### Accuracy Train: ' + str(round(error_metrics['Accuracy_train'], 3)) +
221
+ ' -- Accuracy Test: ' + str(round(error_metrics['Accuracy_test'], 3)))
222
+
223
+
224
+
225
+
226
+ if st.button("Thanks"):
227
+ st.balloons()
228
+
229
+ st.sidebar.header("About App")
230
+ st.sidebar.info("A Simple ML App for predicting Room Occupancy")
231
+
232
+
233
+ st.sidebar.header("Developer")
234
+ st.sidebar.info("Nasim Obeid")
235
+ st.sidebar.text("Built with Streamlit")
236
+
237
+
238
+ if __name__ == '__main__':
239
+ main()
dataset/Occupancy.csv ADDED
The diff for this file is too large to render. See raw diff