Spaces:

teamnassim
/

Room-Occupancy-App

Runtime error

App Files Files Community

Room-Occupancy-App / app.py

teamnassim

removed extra thank you button

f6f285e almost 3 years ago

raw

history blame contribute delete

6.75 kB

	import os
	import streamlit as st

	# EDA Pkgs
	import pandas as pd
	import numpy as np

	# Viz Pkgs
	import matplotlib
	matplotlib.use('Agg')
	import seaborn as sns
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.impute import SimpleImputer
	from sklearn.preprocessing import LabelEncoder
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import accuracy_score

	regressor = LogisticRegression()

	def main():
	""" Common ML Dataset Explorer """
	html_temp = """
	<div style="background-color:pink;"><p style="color:white;font-size:75px;padding:10px">Room Occupancy Predictor 💡</p></div>
	"""
	st.markdown(html_temp,unsafe_allow_html=True)


	def file_selector(folder_path='./dataset'):
	filenames = os.listdir(folder_path)
	selected_filename = st.selectbox("Select A file",filenames)
	return os.path.join(folder_path,selected_filename)

	filename = file_selector()
	st.info("You Selected {}".format(filename))

	# Read Data
	df = pd.read_csv(filename)
	# Show Dataset

	if st.checkbox("Show Dataset"):
	st.write(df.astype(str))

	# Show Columns
	if st.button("Column Names"):
	st.write(df.columns)

	# Show Shape
	if st.checkbox("Shape of Dataset"):
	data_dim = st.radio("Show Dimension By ",("Rows","Columns"))
	if data_dim == 'Rows':
	st.text("Number of Rows")
	st.write(df.shape[0])
	elif data_dim == 'Columns':
	st.text("Number of Columns")
	st.write(df.shape[1])
	else:
	st.write(df.shape)

	# Select Columns
	if st.checkbox("Select Columns To Show"):
	all_columns = df.columns.tolist()
	selected_columns = st.multiselect("Select",all_columns)
	new_df = df[selected_columns]
	st.dataframe(new_df)

	# Show Values
	if st.button("Value Counts"):
	st.text("Value Counts By Target/Class")
	st.write(df.iloc[:,-1].value_counts())


	# Show Datatypes
	if st.button("Data Types"):
	st.text(df.dtypes)



	# Show Summary
	if st.checkbox("Summary"):
	st.write(df.describe().T)

	## Plot and Visualization

	st.subheader("Data Visualization")
	# Correlation
	# Seaborn Plot
	if st.checkbox("Correlation Plot[Seaborn]"):
	st.set_option('deprecation.showPyplotGlobalUse', False)
	st.write(sns.heatmap(df.corr(),annot=True))
	st.pyplot()


	# Pie Chart
	if st.checkbox("Pie Plot"):
	all_columns_names = df.columns.tolist()
	if st.button("Generate Pie Plot"):
	st.success("Generating A Pie Plot")
	st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%"))
	st.pyplot()

	# Count Plot
	if st.checkbox("Plot of Value Counts"):
	st.text("Value Counts By Target")
	all_columns_names = df.columns.tolist()
	primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names)
	selected_columns_names = st.multiselect("Select Columns",all_columns_names)
	if st.button("Plot"):
	st.text("Generate Plot")
	if selected_columns_names:
	vc_plot = df.groupby(primary_col)[selected_columns_names].count()
	else:
	vc_plot = df.iloc[:,-1].value_counts()
	st.write(vc_plot.plot(kind="bar"))
	st.pyplot()


	# Customizable Plot

	st.subheader("Customizable Plot")
	all_columns_names = df.columns.tolist()
	type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"])
	selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names)

	if st.button("Generate Plot"):
	st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names))

	# Plot By Streamlit
	if type_of_plot == 'area':
	cust_data = df[selected_columns_names]
	st.area_chart(cust_data)

	elif type_of_plot == 'bar':
	cust_data = df[selected_columns_names]
	st.bar_chart(cust_data)

	elif type_of_plot == 'line':
	cust_data = df[selected_columns_names]
	st.line_chart(cust_data)

	# Custom Plot
	elif type_of_plot:
	cust_plot= df[selected_columns_names].plot(kind=type_of_plot)
	st.write(cust_plot)
	st.pyplot()

	#Evaluate Model
	st.subheader("Model, Deployment, and Evaluation")
	# Impute nans with mean for numeris and most frequent for categoricals
	cat_imp = SimpleImputer(strategy="most_frequent")
	if len(df.loc[:,df.dtypes == 'object'].columns) != 0:
	df.loc[:,df.dtypes == 'object'] = cat_imp.fit_transform(df.loc[:,df.dtypes == 'object'])
	imp = SimpleImputer(missing_values = np.nan, strategy="mean")
	df.loc[:,df.dtypes != 'object'] = imp.fit_transform(df.loc[:,df.dtypes != 'object'])

	# One hot encoding for categorical variables

	features = st.multiselect('select features and target variable',df.columns.tolist())

	cats = df.dtypes == 'object'
	le = LabelEncoder()
	for x in df.columns[cats]:
	df.loc[:,x] = le.fit_transform(df[x])
	onehotencoder = OneHotEncoder()
	df.loc[:,~cats].join(pd.DataFrame(data=onehotencoder.
	fit_transform (df.loc[:,cats]).toarray(), columns=onehotencoder.get_feature_names_out()))

	chosen_target = st.sidebar.selectbox("Please choose target column", (df.columns))

	X = df.loc[:, df.columns != chosen_target]
	scaler = MinMaxScaler(feature_range=(0,1))

	scaler.fit(X)

	X = pd.DataFrame(scaler.transform(X))
	X.columns = df.loc[:, df.columns != chosen_target].columns
	y = df[chosen_target]
	# Train test
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	type = st.sidebar.selectbox("Algorithm type", ("Classification", "Classification"))
	if type == "Classification":
	chosen_classifier = st.sidebar.selectbox("Please choose a classifier", ('Logistic Regression', 'Naive Bayes'))
	if chosen_classifier == 'Logistic Regression':
	max_iter = st.sidebar.slider('max iterations', 1, 100, 10)
	if type == "Classification":
	if chosen_classifier == 'Logistic Regression':
	alg = LogisticRegression()
	model = alg.fit(X_train, y_train)
	predictions = alg.predict(X_test)
	predictions_train = alg.predict(X_train)
	elif chosen_classifier=='Naive Bayes':
	alg = GaussianNB()
	model = alg.fit(X_train, y_train)
	predictions = alg.predict(X_test)
	predictions_train = alg.predict(X_train)


	error_metrics = {}
	if type == 'Classification':
	error_metrics['Accuracy_test'] = accuracy_score(y_test, predictions)
	error_metrics['Accuracy_train'] = accuracy_score(y_train, predictions_train)
	st.write('### Accuracy Train: ' + str(round(error_metrics['Accuracy_train'], 3)) +
	' -- Accuracy Test: ' + str(round(error_metrics['Accuracy_test'], 3)))




	if st.button("Thanks"):
	st.balloons()

	st.sidebar.header("About App")
	st.sidebar.info("A Simple ML App for predicting Room Occupancy")


	st.sidebar.header("Developer")
	st.sidebar.info("Nasim Obeid")
	st.sidebar.text("Built with Streamlit")


	if __name__ == '__main__':
	main()