Spaces:

Shrek29
/

group-3-final-project

Sleeping

App Files Files Community

group-3-final-project / app.py

sk290601

Added: APP.py

ba61270 about 1 year ago

raw

history blame contribute delete

8.07 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import folium
	from folium.plugins import HeatMap
	from streamlit_folium import st_folium
	import plotly.express as px
	from datetime import datetime

	# Set page config
	st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")

	# Title and introduction
	st.title("Nuisance Complaints Analysis Dashboard")
	st.markdown("""
	Team Members:
	* Lu Chang (luchang2@illinois.edu)
	* Qiming Li (qimingl4@illinois.edu)
	* Ruchita Alate (ralate2@illinois.edu)
	* Shreyas Kulkarni (ssk16@illinois.edu)
	* Vishal Devulapalli (nsd3@illinois.edu)
	""")
	st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")

	# Load and clean data
	@st.cache_data
	def load_and_clean_data():
	try:
	# Load data
	data = pd.read_csv('Nuisance_Complaints.csv')

	# Drop rows with missing 'File Number'
	data = data.dropna(subset=['File Number'])

	# Convert dates and handle date-related columns
	data['Date Reported'] = pd.to_datetime(data['Date Reported'])
	data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
	data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')

	# Handle 'Date Notice Mailed or Given'
	median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
	data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
	data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)

	# Handle 'Type of Complaint'
	data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')

	# Handle 'Disposition'
	most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
	lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
	)
	data['Disposition'] = data.apply(
	lambda row: most_common_disposition[row['Type of Complaint']]
	if pd.isna(row['Disposition']) else row['Disposition'],
	axis=1
	)

	# Calculate processing time for resolved cases
	data['Processing Time'] = np.where(
	data['File Close Date'].notna(),
	(data['File Close Date'] - data['Date Reported']).dt.days,
	np.nan
	)

	# Handle 'Method Submitted'
	data.loc[
	(data['Submitted Online?']) & (data['Method Submitted'].isna()),
	'Method Submitted'
	] = 'Online'
	data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])

	# Drop rows with missing critical values
	data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])

	# Extract and clean location data
	data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
	data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)

	# Ensure Year Reported is integer
	data['Year Reported'] = data['Year Reported'].astype(int)

	return data

	except Exception as e:
	st.error(f"Error in data preprocessing: {str(e)}")
	raise e

	# Load the data
	try:
	data = load_and_clean_data()
	st.success("Data successfully loaded and cleaned!")
	except Exception as e:
	st.error(f"Error loading data: {str(e)}")
	st.stop()

	# Create sidebar

	st.sidebar.header("Dashboard Controls")

	# Get unique years and convert to list for selectbox
	year_list = sorted(data['Year Reported'].unique().tolist())
	year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers

	selected_year = st.sidebar.selectbox(
	"Select Year",
	options=year_options,
	)
	# Add visualization type selector
	viz_type = st.sidebar.selectbox(
	"Select Visualization",
	["Complaint Types", "Geographic Distribution", "Resolution Status",
	"Submission Methods", "Complaints by Disposition"]
	)


	# Filter data based on selected year
	if selected_year == 'All Time':
	filtered_data = data # Use complete dataset when 'All Time' is selected
	else:
	filtered_data = data[data['Year Reported'] == selected_year]

	# Update header text
	if selected_year == 'All Time':
	st.header("Analysis for All Time")
	else:
	st.header(f"Analysis for Year {selected_year}")
	# Main content

	# Create metrics
	# Create metrics
	# Create metrics
	# Create metrics
	# Create metrics
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Total Complaints", len(filtered_data))
	with col2:
	avg_time = filtered_data['Processing Time'].mean()
	st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
	with col3:
	if not filtered_data.empty:
	most_common = filtered_data['Type of Complaint'].value_counts().index[0]
	st.metric("Most Common Type", most_common)
	else:
	st.metric("Most Common Type", "N/A")
	if viz_type == "Complaint Types":
	# Interactive Pie Chart
	st.subheader("Interactive Complaint Types Pie Chart")
	complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
	complaint_counts.columns = ['Complaint Type', 'Count']

	fig = px.pie(
	complaint_counts,
	names='Complaint Type',
	values='Count',
	title=f'Complaint Types Distribution in {selected_year}',
	hole=0.4 # Donut style
	)
	fig.update_traces(textinfo='percent+label')
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "Geographic Distribution":
	# Clustered Heatmap
	st.subheader("Clustered Heatmap of Complaints")
	map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
	m = folium.Map(location=map_center, zoom_start=12)

	heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
	HeatMap(heat_data).add_to(m)

	st_data = st_folium(m, width=700, height=500)


	elif viz_type == "Resolution Status":
	st.subheader("Complaint Resolution Status")
	fig, ax = plt.subplots(figsize=(10, 6))
	resolution_counts = filtered_data['Disposition'].value_counts()
	sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
	plt.title(f'Resolution Status Distribution in {selected_year}')
	st.pyplot(fig)

	elif viz_type == "Submission Methods":
	st.subheader("Submission Methods Analysis")
	fig, ax = plt.subplots(figsize=(10, 6))
	submission_counts = filtered_data['Method Submitted'].value_counts()
	sns.barplot(x=submission_counts.values, y=submission_counts.index)
	plt.title(f'Submission Methods in {selected_year}')
	st.pyplot(fig)


	elif viz_type == "Complaints by Disposition":
	st.subheader("Complaints by Disposition")
	disposition_counts = filtered_data['Disposition'].value_counts()

	if not disposition_counts.empty:
	fig, ax = plt.subplots(figsize=(10, 6))
	sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
	ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
	ax.set_xlabel('Number of Complaints', fontsize=12)
	ax.set_ylabel('Disposition', fontsize=12)
	st.pyplot(fig)
	else:
	st.write("No data available for the selected year.")

	# Additional insights
	st.header("Key Insights")
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Top 3 Complaint Types")
	top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
	st.write(top_complaints)

	with col2:
	st.subheader("Resolution Efficiency")
	resolution_rate = (filtered_data['Disposition'].value_counts() /
	len(filtered_data) * 100).round(2)
	st.write(resolution_rate)

	# Footer
	st.markdown("---")
	st.markdown("Dataset provided by the City of Urbana Open Data Portal")