sk290601's picture
Added: APP.py
ba61270
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px
from datetime import datetime
# Set page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
**Team Members:**
* Lu Chang (luchang2@illinois.edu)
* Qiming Li (qimingl4@illinois.edu)
* Ruchita Alate (ralate2@illinois.edu)
* Shreyas Kulkarni (ssk16@illinois.edu)
* Vishal Devulapalli (nsd3@illinois.edu)
""")
st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
# Load and clean data
@st.cache_data
def load_and_clean_data():
try:
# Load data
data = pd.read_csv('Nuisance_Complaints.csv')
# Drop rows with missing 'File Number'
data = data.dropna(subset=['File Number'])
# Convert dates and handle date-related columns
data['Date Reported'] = pd.to_datetime(data['Date Reported'])
data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
# Handle 'Date Notice Mailed or Given'
median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
# Handle 'Type of Complaint'
data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
# Handle 'Disposition'
most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
)
data['Disposition'] = data.apply(
lambda row: most_common_disposition[row['Type of Complaint']]
if pd.isna(row['Disposition']) else row['Disposition'],
axis=1
)
# Calculate processing time for resolved cases
data['Processing Time'] = np.where(
data['File Close Date'].notna(),
(data['File Close Date'] - data['Date Reported']).dt.days,
np.nan
)
# Handle 'Method Submitted'
data.loc[
(data['Submitted Online?']) & (data['Method Submitted'].isna()),
'Method Submitted'
] = 'Online'
data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
# Drop rows with missing critical values
data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
# Extract and clean location data
data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
# Ensure Year Reported is integer
data['Year Reported'] = data['Year Reported'].astype(int)
return data
except Exception as e:
st.error(f"Error in data preprocessing: {str(e)}")
raise e
# Load the data
try:
data = load_and_clean_data()
st.success("Data successfully loaded and cleaned!")
except Exception as e:
st.error(f"Error loading data: {str(e)}")
st.stop()
# Create sidebar
st.sidebar.header("Dashboard Controls")
# Get unique years and convert to list for selectbox
year_list = sorted(data['Year Reported'].unique().tolist())
year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers
selected_year = st.sidebar.selectbox(
"Select Year",
options=year_options,
)
# Add visualization type selector
viz_type = st.sidebar.selectbox(
"Select Visualization",
["Complaint Types", "Geographic Distribution", "Resolution Status",
"Submission Methods", "Complaints by Disposition"]
)
# Filter data based on selected year
if selected_year == 'All Time':
filtered_data = data # Use complete dataset when 'All Time' is selected
else:
filtered_data = data[data['Year Reported'] == selected_year]
# Update header text
if selected_year == 'All Time':
st.header("Analysis for All Time")
else:
st.header(f"Analysis for Year {selected_year}")
# Main content
# Create metrics
# Create metrics
# Create metrics
# Create metrics
# Create metrics
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Complaints", len(filtered_data))
with col2:
avg_time = filtered_data['Processing Time'].mean()
st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
with col3:
if not filtered_data.empty:
most_common = filtered_data['Type of Complaint'].value_counts().index[0]
st.metric("Most Common Type", most_common)
else:
st.metric("Most Common Type", "N/A")
if viz_type == "Complaint Types":
# Interactive Pie Chart
st.subheader("Interactive Complaint Types Pie Chart")
complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
complaint_counts.columns = ['Complaint Type', 'Count']
fig = px.pie(
complaint_counts,
names='Complaint Type',
values='Count',
title=f'Complaint Types Distribution in {selected_year}',
hole=0.4 # Donut style
)
fig.update_traces(textinfo='percent+label')
st.plotly_chart(fig, use_container_width=True)
elif viz_type == "Geographic Distribution":
# Clustered Heatmap
st.subheader("Clustered Heatmap of Complaints")
map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)
heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
HeatMap(heat_data).add_to(m)
st_data = st_folium(m, width=700, height=500)
elif viz_type == "Resolution Status":
st.subheader("Complaint Resolution Status")
fig, ax = plt.subplots(figsize=(10, 6))
resolution_counts = filtered_data['Disposition'].value_counts()
sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
plt.title(f'Resolution Status Distribution in {selected_year}')
st.pyplot(fig)
elif viz_type == "Submission Methods":
st.subheader("Submission Methods Analysis")
fig, ax = plt.subplots(figsize=(10, 6))
submission_counts = filtered_data['Method Submitted'].value_counts()
sns.barplot(x=submission_counts.values, y=submission_counts.index)
plt.title(f'Submission Methods in {selected_year}')
st.pyplot(fig)
elif viz_type == "Complaints by Disposition":
st.subheader("Complaints by Disposition")
disposition_counts = filtered_data['Disposition'].value_counts()
if not disposition_counts.empty:
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
ax.set_xlabel('Number of Complaints', fontsize=12)
ax.set_ylabel('Disposition', fontsize=12)
st.pyplot(fig)
else:
st.write("No data available for the selected year.")
# Additional insights
st.header("Key Insights")
col1, col2 = st.columns(2)
with col1:
st.subheader("Top 3 Complaint Types")
top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
st.write(top_complaints)
with col2:
st.subheader("Resolution Efficiency")
resolution_rate = (filtered_data['Disposition'].value_counts() /
len(filtered_data) * 100).round(2)
st.write(resolution_rate)
# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal")