Spaces:

teamnassim
/

Room-Occupancy-App

Runtime error

File size: 6,746 Bytes

import os
import streamlit as st

# EDA Pkgs
import pandas as pd
import numpy as np

# Viz Pkgs
import matplotlib
matplotlib.use('Agg')
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

regressor = LogisticRegression()

def main():
	""" Common ML Dataset Explorer """
	html_temp = """
	<div style="background-color:pink;"><p style="color:white;font-size:75px;padding:10px">Room Occupancy Predictor 💡</p></div>
	"""
	st.markdown(html_temp,unsafe_allow_html=True)


	def file_selector(folder_path='./dataset'):
		filenames = os.listdir(folder_path)
		selected_filename = st.selectbox("Select A file",filenames)
		return os.path.join(folder_path,selected_filename)

	filename = file_selector()
	st.info("You Selected {}".format(filename))

	# Read Data
	df = pd.read_csv(filename)
	# Show Dataset

	if st.checkbox("Show Dataset"):
		st.write(df.astype(str))

	# Show Columns
	if st.button("Column Names"):
		st.write(df.columns)

	# Show Shape
	if st.checkbox("Shape of Dataset"):
		data_dim = st.radio("Show Dimension By ",("Rows","Columns"))
		if data_dim == 'Rows':
			st.text("Number of Rows")
			st.write(df.shape[0])
		elif data_dim == 'Columns':
			st.text("Number of Columns")
			st.write(df.shape[1])
		else:
			st.write(df.shape)

	# Select Columns
	if st.checkbox("Select Columns To Show"):
		all_columns = df.columns.tolist()
		selected_columns = st.multiselect("Select",all_columns)
		new_df = df[selected_columns]
		st.dataframe(new_df)
		
	# Show Values
	if st.button("Value Counts"):
		st.text("Value Counts By Target/Class")
		st.write(df.iloc[:,-1].value_counts())


	# Show Datatypes
	if st.button("Data Types"):
		st.text(df.dtypes)



	# Show Summary
	if st.checkbox("Summary"):
		st.write(df.describe().T)

	## Plot and Visualization

	st.subheader("Data Visualization")
	# Correlation
	# Seaborn Plot
	if st.checkbox("Correlation Plot[Seaborn]"):
		st.set_option('deprecation.showPyplotGlobalUse', False)
		st.write(sns.heatmap(df.corr(),annot=True))
		st.pyplot()


	# Pie Chart
	if st.checkbox("Pie Plot"):
		all_columns_names = df.columns.tolist()
		if st.button("Generate Pie Plot"):
			st.success("Generating A Pie Plot")
			st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%"))
			st.pyplot()

	# Count Plot
	if st.checkbox("Plot of Value Counts"):
		st.text("Value Counts By Target")
		all_columns_names = df.columns.tolist()
		primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names)
		selected_columns_names = st.multiselect("Select Columns",all_columns_names)
		if st.button("Plot"):
			st.text("Generate Plot")
			if selected_columns_names:
				vc_plot = df.groupby(primary_col)[selected_columns_names].count()
			else:
				vc_plot = df.iloc[:,-1].value_counts()
			st.write(vc_plot.plot(kind="bar"))
			st.pyplot()


	# Customizable Plot

	st.subheader("Customizable Plot")
	all_columns_names = df.columns.tolist()
	type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"])
	selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names)

	if st.button("Generate Plot"):
		st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names))

		# Plot By Streamlit
		if type_of_plot == 'area':
			cust_data = df[selected_columns_names]
			st.area_chart(cust_data)

		elif type_of_plot == 'bar':
			cust_data = df[selected_columns_names]
			st.bar_chart(cust_data)

		elif type_of_plot == 'line':
			cust_data = df[selected_columns_names]
			st.line_chart(cust_data)

		# Custom Plot
		elif type_of_plot:
			cust_plot= df[selected_columns_names].plot(kind=type_of_plot)
			st.write(cust_plot)
			st.pyplot()

	#Evaluate Model
	st.subheader("Model, Deployment, and Evaluation")
	# Impute nans with mean for numeris and most frequent for categoricals
	cat_imp = SimpleImputer(strategy="most_frequent")
	if len(df.loc[:,df.dtypes == 'object'].columns) != 0:
		df.loc[:,df.dtypes == 'object'] = cat_imp.fit_transform(df.loc[:,df.dtypes == 'object'])
		imp = SimpleImputer(missing_values = np.nan, strategy="mean")
		df.loc[:,df.dtypes != 'object'] = imp.fit_transform(df.loc[:,df.dtypes != 'object'])

# One hot encoding for categorical variables

	features = st.multiselect('select features and target variable',df.columns.tolist())

	cats = df.dtypes == 'object'
	le = LabelEncoder()
	for x in df.columns[cats]:
		df.loc[:,x] = le.fit_transform(df[x])
		onehotencoder = OneHotEncoder()
		df.loc[:,~cats].join(pd.DataFrame(data=onehotencoder.
		fit_transform (df.loc[:,cats]).toarray(), columns=onehotencoder.get_feature_names_out()))

	chosen_target = st.sidebar.selectbox("Please choose target column", (df.columns))

	X = df.loc[:, df.columns != chosen_target]
	scaler = MinMaxScaler(feature_range=(0,1))

	scaler.fit(X)

	X = pd.DataFrame(scaler.transform(X))
	X.columns = df.loc[:, df.columns != chosen_target].columns
	y = df[chosen_target]
# Train test
	X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.2, random_state=42)

	type = st.sidebar.selectbox("Algorithm type", ("Classification", "Classification"))
	if type == "Classification":
		chosen_classifier = st.sidebar.selectbox("Please choose a classifier", ('Logistic Regression', 'Naive Bayes'))
		if chosen_classifier == 'Logistic Regression':
			max_iter = st.sidebar.slider('max iterations', 1, 100, 10)
	if type == "Classification":
		if chosen_classifier == 'Logistic Regression':
			alg = LogisticRegression()
			model = alg.fit(X_train, y_train)
			predictions = alg.predict(X_test)
			predictions_train = alg.predict(X_train)
		elif chosen_classifier=='Naive Bayes':
			alg = GaussianNB()
			model = alg.fit(X_train, y_train)
			predictions = alg.predict(X_test)
			predictions_train = alg.predict(X_train)


	error_metrics = {}
	if  type == 'Classification':
		error_metrics['Accuracy_test'] = accuracy_score(y_test, predictions)
		error_metrics['Accuracy_train'] = accuracy_score(y_train, predictions_train)
		st.write('### Accuracy Train: ' + str(round(error_metrics['Accuracy_train'], 3)) +
				' -- Accuracy Test: ' +  str(round(error_metrics['Accuracy_test'], 3)))




	if st.button("Thanks"):
		st.balloons()

	st.sidebar.header("About App")
	st.sidebar.info("A Simple ML App for predicting Room Occupancy")


	st.sidebar.header("Developer")
	st.sidebar.info("Nasim Obeid")
	st.sidebar.text("Built with Streamlit")


if __name__ == '__main__':
	main()