Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import seaborn as sns
|
|
|
|
| 6 |
import folium
|
| 7 |
from folium.plugins import HeatMap
|
| 8 |
from streamlit_folium import st_folium
|
|
@@ -21,118 +22,66 @@ st.markdown("""
|
|
| 21 |
* Ruchita Alate (ralate2@illinois.edu)
|
| 22 |
* Shreyas Kulkarni (ssk16@illinois.edu)
|
| 23 |
* Vishal Devulapalli (nsd3@illinois.edu)
|
|
|
|
|
|
|
| 24 |
""")
|
| 25 |
-
st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
|
| 26 |
|
| 27 |
# Load and clean data
|
| 28 |
@st.cache_data
|
| 29 |
def load_and_clean_data():
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
data['Date Notice Mailed or Given']
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
|
| 49 |
-
|
| 50 |
-
# Handle 'Disposition'
|
| 51 |
-
most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
|
| 52 |
-
lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
|
| 53 |
-
)
|
| 54 |
-
data['Disposition'] = data.apply(
|
| 55 |
-
lambda row: most_common_disposition[row['Type of Complaint']]
|
| 56 |
-
if pd.isna(row['Disposition']) else row['Disposition'],
|
| 57 |
-
axis=1
|
| 58 |
-
)
|
| 59 |
-
|
| 60 |
-
# Calculate processing time for resolved cases
|
| 61 |
-
data['Processing Time'] = np.where(
|
| 62 |
-
data['File Close Date'].notna(),
|
| 63 |
-
(data['File Close Date'] - data['Date Reported']).dt.days,
|
| 64 |
-
np.nan
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
# Handle 'Method Submitted'
|
| 68 |
-
data.loc[
|
| 69 |
-
(data['Submitted Online?']) & (data['Method Submitted'].isna()),
|
| 70 |
-
'Method Submitted'
|
| 71 |
-
] = 'Online'
|
| 72 |
-
data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
|
| 73 |
-
|
| 74 |
-
# Drop rows with missing critical values
|
| 75 |
-
data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
|
| 76 |
-
|
| 77 |
-
# Extract and clean location data
|
| 78 |
-
data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
|
| 79 |
-
data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
|
| 80 |
-
|
| 81 |
-
# Ensure Year Reported is integer
|
| 82 |
-
data['Year Reported'] = data['Year Reported'].astype(int)
|
| 83 |
-
|
| 84 |
-
return data
|
| 85 |
-
|
| 86 |
-
except Exception as e:
|
| 87 |
-
st.error(f"Error in data preprocessing: {str(e)}")
|
| 88 |
-
raise e
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
|
|
|
| 100 |
st.sidebar.header("Dashboard Controls")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
#
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
options=year_options,
|
| 109 |
-
)
|
| 110 |
-
# Add visualization type selector
|
| 111 |
-
viz_type = st.sidebar.selectbox(
|
| 112 |
-
"Select Visualization",
|
| 113 |
-
["Complaint Types", "Geographic Distribution", "Resolution Status",
|
| 114 |
-
"Submission Methods", "Complaints by Disposition"]
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
# Filter data based on selected year
|
| 119 |
-
if selected_year == 'All Time':
|
| 120 |
-
filtered_data = data # Use complete dataset when 'All Time' is selected
|
| 121 |
-
else:
|
| 122 |
-
filtered_data = data[data['Year Reported'] == selected_year]
|
| 123 |
-
|
| 124 |
-
# Update header text
|
| 125 |
-
if selected_year == 'All Time':
|
| 126 |
-
st.header("Analysis for All Time")
|
| 127 |
-
else:
|
| 128 |
-
st.header(f"Analysis for Year {selected_year}")
|
| 129 |
-
# Main content
|
| 130 |
-
|
| 131 |
-
# Create metrics
|
| 132 |
-
# Create metrics
|
| 133 |
-
# Create metrics
|
| 134 |
-
# Create metrics
|
| 135 |
-
# Create metrics
|
| 136 |
col1, col2, col3 = st.columns(3)
|
| 137 |
with col1:
|
| 138 |
st.metric("Total Complaints", len(filtered_data))
|
|
@@ -140,85 +89,90 @@ with col2:
|
|
| 140 |
avg_time = filtered_data['Processing Time'].mean()
|
| 141 |
st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
|
| 142 |
with col3:
|
| 143 |
-
if not filtered_data.empty
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
st.metric("Most Common Type", "N/A")
|
| 148 |
if viz_type == "Complaint Types":
|
| 149 |
-
# Interactive Pie Chart
|
| 150 |
st.subheader("Interactive Complaint Types Pie Chart")
|
| 151 |
complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
|
| 152 |
complaint_counts.columns = ['Complaint Type', 'Count']
|
| 153 |
-
|
| 154 |
-
fig = px.pie(
|
| 155 |
-
complaint_counts,
|
| 156 |
-
names='Complaint Type',
|
| 157 |
-
values='Count',
|
| 158 |
-
title=f'Complaint Types Distribution in {selected_year}',
|
| 159 |
-
hole=0.4 # Donut style
|
| 160 |
-
)
|
| 161 |
-
fig.update_traces(textinfo='percent+label')
|
| 162 |
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
elif viz_type == "Geographic Distribution":
|
| 165 |
-
# Clustered Heatmap
|
| 166 |
st.subheader("Clustered Heatmap of Complaints")
|
| 167 |
map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
|
| 168 |
m = folium.Map(location=map_center, zoom_start=12)
|
| 169 |
-
|
| 170 |
heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
|
| 171 |
HeatMap(heat_data).add_to(m)
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
| 175 |
|
| 176 |
elif viz_type == "Resolution Status":
|
| 177 |
-
st.subheader("Complaint Resolution Status")
|
| 178 |
-
|
| 179 |
-
resolution_counts =
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
elif viz_type == "Submission Methods":
|
| 185 |
st.subheader("Submission Methods Analysis")
|
| 186 |
-
fig, ax = plt.subplots(figsize=(10, 6))
|
| 187 |
submission_counts = filtered_data['Method Submitted'].value_counts()
|
| 188 |
-
|
| 189 |
-
|
|
|
|
| 190 |
st.pyplot(fig)
|
| 191 |
-
|
|
|
|
|
|
|
| 192 |
|
| 193 |
elif viz_type == "Complaints by Disposition":
|
| 194 |
st.subheader("Complaints by Disposition")
|
| 195 |
disposition_counts = filtered_data['Disposition'].value_counts()
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
st.pyplot(fig)
|
| 204 |
-
else:
|
| 205 |
-
st.write("No data available for the selected year.")
|
| 206 |
-
|
| 207 |
-
# Additional insights
|
| 208 |
-
st.header("Key Insights")
|
| 209 |
-
col1, col2 = st.columns(2)
|
| 210 |
-
|
| 211 |
-
with col1:
|
| 212 |
-
st.subheader("Top 3 Complaint Types")
|
| 213 |
-
top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
|
| 214 |
-
st.write(top_complaints)
|
| 215 |
-
|
| 216 |
-
with col2:
|
| 217 |
-
st.subheader("Resolution Efficiency")
|
| 218 |
-
resolution_rate = (filtered_data['Disposition'].value_counts() /
|
| 219 |
-
len(filtered_data) * 100).round(2)
|
| 220 |
-
st.write(resolution_rate)
|
| 221 |
-
|
| 222 |
-
# Footer
|
| 223 |
-
st.markdown("---")
|
| 224 |
-
st.markdown("Dataset provided by the City of Urbana Open Data Portal")
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import seaborn as sns
|
| 6 |
+
import altair as alt
|
| 7 |
import folium
|
| 8 |
from folium.plugins import HeatMap
|
| 9 |
from streamlit_folium import st_folium
|
|
|
|
| 22 |
* Ruchita Alate (ralate2@illinois.edu)
|
| 23 |
* Shreyas Kulkarni (ssk16@illinois.edu)
|
| 24 |
* Vishal Devulapalli (nsd3@illinois.edu)
|
| 25 |
+
|
| 26 |
+
This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
|
| 27 |
""")
|
|
|
|
| 28 |
|
| 29 |
# Load and clean data
|
| 30 |
@st.cache_data
|
| 31 |
def load_and_clean_data():
|
| 32 |
+
data = pd.read_csv('Nuisance_Complaints_20241204.csv')
|
| 33 |
+
data = data.dropna(subset=['File Number'])
|
| 34 |
+
data['Date Reported'] = pd.to_datetime(data['Date Reported'])
|
| 35 |
+
data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
|
| 36 |
+
data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
|
| 37 |
+
|
| 38 |
+
# Fill missing dates
|
| 39 |
+
median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
|
| 40 |
+
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
|
| 41 |
+
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
|
| 42 |
+
|
| 43 |
+
data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
|
| 44 |
+
most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
|
| 45 |
+
lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
|
| 46 |
+
)
|
| 47 |
+
data['Disposition'] = data.apply(
|
| 48 |
+
lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
|
| 49 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
data['Processing Time'] = np.where(
|
| 52 |
+
data['File Close Date'].notna(),
|
| 53 |
+
(data['File Close Date'] - data['Date Reported']).dt.days,
|
| 54 |
+
np.nan
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
|
| 58 |
+
data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
|
| 59 |
+
data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
|
| 60 |
|
| 61 |
+
data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
|
| 62 |
+
data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
|
| 63 |
+
data['Year Reported'] = data['Year Reported'].astype(int)
|
| 64 |
+
data['Month Reported'] = data['Date Reported'].dt.month
|
| 65 |
+
return data
|
| 66 |
+
|
| 67 |
+
# Load the data
|
| 68 |
+
data = load_and_clean_data()
|
| 69 |
|
| 70 |
+
# Sidebar for controls
|
| 71 |
st.sidebar.header("Dashboard Controls")
|
| 72 |
+
year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
|
| 73 |
+
selected_year = st.sidebar.selectbox("Select Year", options=year_options)
|
| 74 |
+
viz_type = st.sidebar.selectbox("Select Visualization", [
|
| 75 |
+
"Complaint Types", "Geographic Distribution", "Resolution Status",
|
| 76 |
+
"Submission Methods", "Complaints by Disposition", "Monthly Trends by Complaint Type"
|
| 77 |
+
])
|
| 78 |
|
| 79 |
+
# Filter data based on year
|
| 80 |
+
filtered_data = data if selected_year == 'All Time' else data[data['Year Reported'] == selected_year]
|
| 81 |
+
|
| 82 |
+
st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")
|
| 83 |
+
|
| 84 |
+
# Display metrics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
col1, col2, col3 = st.columns(3)
|
| 86 |
with col1:
|
| 87 |
st.metric("Total Complaints", len(filtered_data))
|
|
|
|
| 89 |
avg_time = filtered_data['Processing Time'].mean()
|
| 90 |
st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
|
| 91 |
with col3:
|
| 92 |
+
most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
|
| 93 |
+
st.metric("Most Common Type", most_common)
|
| 94 |
+
|
| 95 |
+
# Visualizations
|
|
|
|
| 96 |
if viz_type == "Complaint Types":
|
|
|
|
| 97 |
st.subheader("Interactive Complaint Types Pie Chart")
|
| 98 |
complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
|
| 99 |
complaint_counts.columns = ['Complaint Type', 'Count']
|
| 100 |
+
fig = px.pie(complaint_counts, names='Complaint Type', values='Count', hole=0.4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
st.plotly_chart(fig, use_container_width=True)
|
| 102 |
+
st.write("""
|
| 103 |
+
**Write-up:** This visualization shows the distribution of complaint types as a donut chart.
|
| 104 |
+
It provides a quick overview of the most common complaints. The warm color palette helps
|
| 105 |
+
highlight differences between complaint categories.""")
|
| 106 |
|
| 107 |
elif viz_type == "Geographic Distribution":
|
|
|
|
| 108 |
st.subheader("Clustered Heatmap of Complaints")
|
| 109 |
map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
|
| 110 |
m = folium.Map(location=map_center, zoom_start=12)
|
|
|
|
| 111 |
heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
|
| 112 |
HeatMap(heat_data).add_to(m)
|
| 113 |
+
st_folium(m, width=700, height=500)
|
| 114 |
+
st.write("""
|
| 115 |
+
**Write-up:** This heatmap visualizes complaint hotspots geographically. Areas with
|
| 116 |
+
higher complaint density are highlighted, helping policymakers focus resources effectively.""")
|
| 117 |
|
| 118 |
elif viz_type == "Resolution Status":
|
| 119 |
+
st.subheader("Interactive Complaint Resolution Status")
|
| 120 |
+
resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
|
| 121 |
+
resolution_counts.columns = ['Disposition', 'Count']
|
| 122 |
+
resolution_counts['Percentage'] = (resolution_counts['Count'] / resolution_counts['Count'].sum()) * 100
|
| 123 |
+
chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
|
| 124 |
+
theta=alt.Theta(field="Count", type="quantitative"),
|
| 125 |
+
color=alt.Color(field="Disposition", type="nominal"),
|
| 126 |
+
tooltip=[
|
| 127 |
+
alt.Tooltip("Disposition", title="Resolution"),
|
| 128 |
+
alt.Tooltip("Count", title="Count"),
|
| 129 |
+
alt.Tooltip("Percentage", title="Percentage", format=".2f")
|
| 130 |
+
]
|
| 131 |
+
)
|
| 132 |
+
st.altair_chart(chart, use_container_width=True)
|
| 133 |
+
st.write("""
|
| 134 |
+
**Write-up:** This chart visualizes resolution status using a donut chart.
|
| 135 |
+
It provides insights into the efficiency of complaint resolutions.""")
|
| 136 |
+
|
| 137 |
+
elif viz_type == "Monthly Trends by Complaint Type":
|
| 138 |
+
st.subheader("Monthly Trends Grouped by Complaint Types")
|
| 139 |
+
monthly_trends = (
|
| 140 |
+
filtered_data.groupby(['Month Reported', 'Type of Complaint'])
|
| 141 |
+
.size()
|
| 142 |
+
.reset_index(name='Count')
|
| 143 |
+
)
|
| 144 |
+
monthly_trends['Month'] = monthly_trends['Month Reported'].apply(
|
| 145 |
+
lambda x: datetime(2023, x, 1).strftime('%B')
|
| 146 |
+
)
|
| 147 |
+
chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
|
| 148 |
+
x=alt.X('Month Reported:O', title='Month'),
|
| 149 |
+
y=alt.Y('Count:Q', title='Number of Complaints'),
|
| 150 |
+
color=alt.Color('Type of Complaint:N', title='Complaint Type'),
|
| 151 |
+
tooltip=["Type of Complaint:N", "Month:N", "Count:Q"]
|
| 152 |
+
)
|
| 153 |
+
st.altair_chart(chart, use_container_width=True)
|
| 154 |
+
st.write("""
|
| 155 |
+
**Write-up:** This line chart visualizes monthly trends in complaints grouped by type.
|
| 156 |
+
The use of vibrant colors helps distinguish trends across different complaint types.""")
|
| 157 |
|
| 158 |
elif viz_type == "Submission Methods":
|
| 159 |
st.subheader("Submission Methods Analysis")
|
|
|
|
| 160 |
submission_counts = filtered_data['Method Submitted'].value_counts()
|
| 161 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 162 |
+
sns.barplot(x=submission_counts.values, y=submission_counts.index, palette='inferno', ax=ax)
|
| 163 |
+
ax.set_title(f"Submission Methods in {selected_year}")
|
| 164 |
st.pyplot(fig)
|
| 165 |
+
st.write("""
|
| 166 |
+
**Write-up:** This bar chart illustrates the preferred methods for complaint submission.
|
| 167 |
+
The `inferno` color palette highlights differences across submission types.""")
|
| 168 |
|
| 169 |
elif viz_type == "Complaints by Disposition":
|
| 170 |
st.subheader("Complaints by Disposition")
|
| 171 |
disposition_counts = filtered_data['Disposition'].value_counts()
|
| 172 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 173 |
+
sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
|
| 174 |
+
ax.set_title(f"Complaints by Disposition in {selected_year}")
|
| 175 |
+
st.pyplot(fig)
|
| 176 |
+
st.write("""
|
| 177 |
+
**Write-up:** This bar chart shows the distribution of complaints across various dispositions.
|
| 178 |
+
The `viridis` color palette effectively highlights disposition categories.""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|