Spaces:
Sleeping
Sleeping
File size: 8,070 Bytes
ba61270 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px
from datetime import datetime
# Set page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
**Team Members:**
* Lu Chang (luchang2@illinois.edu)
* Qiming Li (qimingl4@illinois.edu)
* Ruchita Alate (ralate2@illinois.edu)
* Shreyas Kulkarni (ssk16@illinois.edu)
* Vishal Devulapalli (nsd3@illinois.edu)
""")
st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
# Load and clean data
@st.cache_data
def load_and_clean_data():
try:
# Load data
data = pd.read_csv('Nuisance_Complaints.csv')
# Drop rows with missing 'File Number'
data = data.dropna(subset=['File Number'])
# Convert dates and handle date-related columns
data['Date Reported'] = pd.to_datetime(data['Date Reported'])
data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
# Handle 'Date Notice Mailed or Given'
median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
# Handle 'Type of Complaint'
data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
# Handle 'Disposition'
most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
)
data['Disposition'] = data.apply(
lambda row: most_common_disposition[row['Type of Complaint']]
if pd.isna(row['Disposition']) else row['Disposition'],
axis=1
)
# Calculate processing time for resolved cases
data['Processing Time'] = np.where(
data['File Close Date'].notna(),
(data['File Close Date'] - data['Date Reported']).dt.days,
np.nan
)
# Handle 'Method Submitted'
data.loc[
(data['Submitted Online?']) & (data['Method Submitted'].isna()),
'Method Submitted'
] = 'Online'
data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
# Drop rows with missing critical values
data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
# Extract and clean location data
data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
# Ensure Year Reported is integer
data['Year Reported'] = data['Year Reported'].astype(int)
return data
except Exception as e:
st.error(f"Error in data preprocessing: {str(e)}")
raise e
# Load the data
try:
data = load_and_clean_data()
st.success("Data successfully loaded and cleaned!")
except Exception as e:
st.error(f"Error loading data: {str(e)}")
st.stop()
# Create sidebar
st.sidebar.header("Dashboard Controls")
# Get unique years and convert to list for selectbox
year_list = sorted(data['Year Reported'].unique().tolist())
year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers
selected_year = st.sidebar.selectbox(
"Select Year",
options=year_options,
)
# Add visualization type selector
viz_type = st.sidebar.selectbox(
"Select Visualization",
["Complaint Types", "Geographic Distribution", "Resolution Status",
"Submission Methods", "Complaints by Disposition"]
)
# Filter data based on selected year
if selected_year == 'All Time':
filtered_data = data # Use complete dataset when 'All Time' is selected
else:
filtered_data = data[data['Year Reported'] == selected_year]
# Update header text
if selected_year == 'All Time':
st.header("Analysis for All Time")
else:
st.header(f"Analysis for Year {selected_year}")
# Main content
# Create metrics
# Create metrics
# Create metrics
# Create metrics
# Create metrics
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Complaints", len(filtered_data))
with col2:
avg_time = filtered_data['Processing Time'].mean()
st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
with col3:
if not filtered_data.empty:
most_common = filtered_data['Type of Complaint'].value_counts().index[0]
st.metric("Most Common Type", most_common)
else:
st.metric("Most Common Type", "N/A")
if viz_type == "Complaint Types":
# Interactive Pie Chart
st.subheader("Interactive Complaint Types Pie Chart")
complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
complaint_counts.columns = ['Complaint Type', 'Count']
fig = px.pie(
complaint_counts,
names='Complaint Type',
values='Count',
title=f'Complaint Types Distribution in {selected_year}',
hole=0.4 # Donut style
)
fig.update_traces(textinfo='percent+label')
st.plotly_chart(fig, use_container_width=True)
elif viz_type == "Geographic Distribution":
# Clustered Heatmap
st.subheader("Clustered Heatmap of Complaints")
map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)
heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
HeatMap(heat_data).add_to(m)
st_data = st_folium(m, width=700, height=500)
elif viz_type == "Resolution Status":
st.subheader("Complaint Resolution Status")
fig, ax = plt.subplots(figsize=(10, 6))
resolution_counts = filtered_data['Disposition'].value_counts()
sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
plt.title(f'Resolution Status Distribution in {selected_year}')
st.pyplot(fig)
elif viz_type == "Submission Methods":
st.subheader("Submission Methods Analysis")
fig, ax = plt.subplots(figsize=(10, 6))
submission_counts = filtered_data['Method Submitted'].value_counts()
sns.barplot(x=submission_counts.values, y=submission_counts.index)
plt.title(f'Submission Methods in {selected_year}')
st.pyplot(fig)
elif viz_type == "Complaints by Disposition":
st.subheader("Complaints by Disposition")
disposition_counts = filtered_data['Disposition'].value_counts()
if not disposition_counts.empty:
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
ax.set_xlabel('Number of Complaints', fontsize=12)
ax.set_ylabel('Disposition', fontsize=12)
st.pyplot(fig)
else:
st.write("No data available for the selected year.")
# Additional insights
st.header("Key Insights")
col1, col2 = st.columns(2)
with col1:
st.subheader("Top 3 Complaint Types")
top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
st.write(top_complaints)
with col2:
st.subheader("Resolution Efficiency")
resolution_rate = (filtered_data['Disposition'].value_counts() /
len(filtered_data) * 100).round(2)
st.write(resolution_rate)
# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal") |