File size: 20,041 Bytes
a1d9286
 
 
 
 
b7feb2b
0972530
a1d9286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7feb2b
 
a1d9286
 
 
 
 
140971e
b7feb2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1d9286
b7feb2b
 
 
 
 
 
 
 
 
a1d9286
b7feb2b
 
 
 
 
 
 
 
a1d9286
b7feb2b
a1d9286
38d8fcf
29c6866
 
8b33a3f
4b58641
29c6866
 
38d8fcf
 
 
 
 
 
 
5b69b6a
 
 
 
1a4102e
c1925f9
 
 
 
5b69b6a
 
 
 
1a4102e
88098e3
 
 
 
1a4102e
ad4db27
 
 
 
 
38d8fcf
88098e3
ad4db27
b7feb2b
5b69b6a
 
 
 
 
 
 
38d8fcf
 
 
 
 
1a4102e
 
b7feb2b
 
5b69b6a
b7feb2b
97667b8
a1d9286
 
 
b7feb2b
 
 
 
a1d9286
fac1189
1d4c87b
fac1189
 
 
1d4c87b
4b58641
1d4c87b
fac1189
1d4c87b
 
fac1189
4b58641
1d4c87b
 
 
 
 
 
a1d9286
1d4c87b
 
 
fac1189
4b58641
fac1189
 
 
4b58641
1d4c87b
 
a1d9286
 
fac1189
 
a1d9286
 
 
 
fac1189
 
b7feb2b
fac1189
 
 
 
 
 
 
 
 
 
 
a1d9286
4b58641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1925f9
4b58641
8b33a3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b58641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fac1189
a1d9286
 
4b58641
 
 
 
 
 
 
 
 
 
 
 
 
b7feb2b
0972530
fac1189
 
c1925f9
4b58641
fac1189
4b58641
 
fac1189
4b58641
fac1189
 
 
4b58641
0972530
 
 
 
 
 
 
 
 
 
fac1189
 
 
 
 
 
 
 
 
 
 
 
4b58641
 
 
 
 
 
0972530
4b58641
 
 
fac1189
4b58641
 
fac1189
4b58641
 
 
fac1189
0972530
 
1a4102e
fac1189
1a4102e
c1925f9
0972530
 
 
fac1189
 
 
 
 
 
edea274
0972530
1a4102e
ad4db27
 
 
 
 
 
69bfce8
ad4db27
 
 
 
 
 
 
 
 
 
1a4102e
b01c1d2
 
 
ad4db27
0972530
 
88098e3
fac1189
 
 
 
 
 
 
 
 
 
0972530
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from datetime import datetime
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px

# Set page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")

# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
**Team Members:** 
* Lu Chang (luchang2@illinois.edu)
* Qiming Li (qimingl4@illinois.edu)
* Ruchita Alate (ralate2@illinois.edu)
* Shreyas Kulkarni (ssk16@illinois.edu)
* Vishal Devulapalli (nsd3@illinois.edu)

This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
""")

# Load and clean data
@st.cache_data
def load_and_clean_data():
    data = pd.read_csv('Nuisance_Complaints_20241130.csv')
    data = data.dropna(subset=['File Number'])
    data['Date Reported'] = pd.to_datetime(data['Date Reported'])
    data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
    data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')

    # Fill missing dates
    median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
    data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)

    data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
    most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
        lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
    )
    data['Disposition'] = data.apply(
        lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
    )

    data['Processing Time'] = np.where(
        data['File Close Date'].notna(),
        (data['File Close Date'] - data['Date Reported']).dt.days,
        np.nan
    )

    data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
    data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
    data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])

    data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
    data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
    data['Year Reported'] = data['Year Reported'].astype(int)
    data['Month Reported'] = data['Date Reported'].dt.month
    return data

# Load the data
data = load_and_clean_data()

# Sidebar for controls
st.sidebar.header("Dashboard Controls")

# First, define the visualization type (viz_type) selection
viz_type = st.sidebar.selectbox("Select Visualization", [
    "Complaint Types", "Geographic Distribution", "Complaints by Disposition", "Submission Methods","Monthly Trends by Complaint Type",
    "Complaints Over Time", "Complaints by Housing Block and Type"
])

# Remove the year selection when certain visualizations are selected
if viz_type not in ["Complaints Over Time", "Complaints by Housing Block and Type"]:
    year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
    selected_year = st.sidebar.selectbox("Select Year", options=year_options)
else:
    selected_year = 'All Time'  # Default to 'All Time' if visualization doesn't require year

# Date Range Selector for Complaints Over Time (only show when Complaints Over Time is selected)
if viz_type == "Complaints Over Time":
    start_date = st.sidebar.date_input("Start Date", pd.to_datetime("2020-01-01"))
    end_date = st.sidebar.date_input("End Date", pd.to_datetime("2024-12-31"))

# Allow the user to select the type of chart (Bar or Pie) for Submission Methods
if viz_type == "Submission Methods":
    plot_type = st.sidebar.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])

# Dropdown for Housing Block (only show when Complaints by Housing Block and Type is selected)
if viz_type == "Complaints by Housing Block and Type":
    block_options = ['All Blocks'] + sorted(data['Housing Block'].unique().tolist())
    selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options)

# Ensure selected_block is only used if defined
if viz_type == "Complaints by Housing Block and Type" and 'selected_block' not in locals():
    selected_block = 'All Blocks'  # Default to 'All Blocks' if no selection made

# Filter data based on selected year
if selected_year != 'All Time':
    filtered_data = data[data['Year Reported'] == selected_year]
else:
    filtered_data = data  

# Further filter by Housing Block
if 'selected_block' in locals() and selected_block != 'All Blocks':
    filtered_data = filtered_data[filtered_data['Housing Block'] == selected_block]

# Filter data based on date range (only for Complaints Over Time visualization)
if viz_type == "Complaints Over Time":
    filtered_data_time = filtered_data[
        (filtered_data['Date Reported'] >= pd.to_datetime(start_date)) & 
        (filtered_data['Date Reported'] <= pd.to_datetime(end_date))
    ]
else:
    filtered_data_time = filtered_data

# Filter data based on selected housing block (only for Complaints by Housing Block visualization)
if viz_type == "Complaints by Housing Block and Type" and selected_block != 'All Blocks':
    filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]

# Header for selected year
st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")


# Display metrics
col1, col2 = st.columns(2)
with col1:
    st.metric("Total Complaints", len(filtered_data))
with col2:
    most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
    st.metric("Most Common Type", most_common)

# Visualizations
if viz_type == "Complaint Types":
    st.subheader("Top 5 Complaint Types Pie Chart")
    
    # Prepare data: Select the top 5 complaint types
    top_complaints = filtered_data['Type of Complaint'].value_counts().nlargest(5).reset_index()
    top_complaints.columns = ['Complaint Type', 'Count']
    
    # Create an interactive pie chart with the 'inferno' color scheme
    fig = px.pie(
        top_complaints,
        names='Complaint Type',
        values='Count',
        title="Top 5 Complaint Types Distribution",
        color_discrete_sequence=px.colors.sequential.Inferno, 
        labels={"Count": "Number of Complaints", "Complaint Type": "Type of Complaint"},
        hover_data=['Count']
    )
    fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
    
    # Display chart
    st.plotly_chart(fig, use_container_width=True)
    
    # Updated write-up
    st.write("""
    **What this visualization shows:**  
    This interactive pie chart displays the distribution of the top 5 complaint types by year.  
    **Why it's interesting:**  
    Hovering over each segment reveals detailed information, including the complaint type, the number of complaints, and its percentage of the total. By focusing on the top 5 complaint categories, this visualization helps identify the most commonly reported issues, enabling better prioritization of resources and targeted interventions.  
    **Color Scheme:**  
    Each complaint type is represented by a unique color from the 'inferno' color scheme, which visually distinguishes between categories and makes the chart more engaging.  
    """)

elif viz_type == "Geographic Distribution":
    st.subheader("Clustered Heatmap of Complaints")
    
    # Generate the heatmap
    map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=12)
    heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
    HeatMap(heat_data).add_to(m)
    
    # Display the map
    st_folium(m, width=700, height=500)
    
    # Write-up
    st.write("""
    **What this visualization shows:**  
    This clustered heatmap visualizes complaint locations across the city.  
    **Why it's interesting:**  
    It highlights geographic areas with higher complaint densities, known as hotspots. These areas can be targeted for intervention and resource allocation.  
    **Color Scheme:**  
    The heatmap uses gradient colors, where warmer tones (red/orange) represent higher densities of complaints, providing a clear visual cue for problem areas.  
    """)


# elif viz_type == "Resolution Status":
#     st.subheader("Interactive Complaint Resolution Status")
#     resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
#     resolution_counts.columns = ['Disposition', 'Count']
#     chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
#         theta=alt.Theta(field="Count", type="quantitative"),
#         color=alt.Color(field="Disposition", type="nominal"),
#         tooltip=[
#             alt.Tooltip("Disposition", title="Resolution"),
#             alt.Tooltip("Count", title="Count")
#         ]
#     )
#     st.altair_chart(chart, use_container_width=True)

#     st.write("""
#     **What this visualization shows:**  
#     This interactive donut chart displays the distribution of complaint resolutions, such as resolved, unresolved, or escalated cases.  
#     **Why it's interesting:**  
#     By analyzing the resolution status, we can assess the effectiveness of complaint handling and identify areas for improvement.  
#     **Color Scheme:**  
#     Each resolution status is represented by a distinct color, making it easy to differentiate between categories and quickly interpret the data.  
#     """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.

# Submission Methods Analysis
elif viz_type == "Submission Methods":
    st.subheader("Submission Methods Analysis")

    # Allow the user to select the type of chart (Bar or Pie)
    plot_type = st.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])

    # Get the top 5 submission methods
    submission_counts = filtered_data['Method Submitted'].value_counts().nlargest(5)
    submission_data = submission_counts.reset_index()
    submission_data.columns = ['Submission Method', 'Count']

    if plot_type == "Bar Chart":
        # Create a bar chart with Seaborn
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(x=submission_data['Count'], y=submission_data['Submission Method'], palette='inferno', ax=ax)
        st.pyplot(fig)
    
    elif plot_type == "Pie Chart":
        # Create an interactive pie chart with Plotly
        fig = px.pie(
            submission_data,
            names='Submission Method',
            values='Count',
            title="Top 5 Submission Methods Distribution",
            color_discrete_sequence=px.colors.inferno,
            labels={"Count": "Number of Complaints", "Submission Method": "Method Submitted"},
            hover_data=['Count']
        )
        fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
        st.plotly_chart(fig, use_container_width=True)

    # Updated write-up
    st.write("""
    **What this visualization shows:**  
    This chart shows the number of complaints submitted via different methods, such as email, phone, online form, etc., with a focus on the top 5 submission methods.
    
    **Why it's interesting:**  
    By analyzing submission methods, we can understand how users prefer to submit complaints. This insight helps in focusing efforts on improving the most used channels, ensuring better user engagement.
    
    **Color Scheme:**  
    The 'inferno' color palette highlights differences in submission frequency, with darker shades representing higher submission counts.
    """)



# elif viz_type == "Complaints by Disposition":
#     st.subheader("Complaints by Disposition")
#     disposition_counts = filtered_data['Disposition'].value_counts()
#     fig, ax = plt.subplots(figsize=(10, 6))
#     sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
#     st.pyplot(fig)

#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the distribution of complaints by their resolution status (disposition), such as 'Resolved', 'Unresolved', etc.  
#     **Why it's interesting:**  
#     By examining the disposition of complaints, organizations can assess how effectively issues are being addressed and identify any areas needing improvement.  
#     **Color Scheme:**  
#     The 'viridis' color palette highlights differences in complaint resolution status, with lighter shades indicating a higher frequency of resolved complaints.  
#     """)

elif viz_type == "Complaints by Disposition":
    st.subheader("Complaints by Disposition")
    
    # Group by Complaint Type and Disposition to make the analysis more complex
    disposition_by_complaint_type = filtered_data.groupby(['Type of Complaint', 'Disposition']).size().unstack(fill_value=0)
    
    # Plot the bar chart with the 'inferno' color scheme
    fig, ax = plt.subplots(figsize=(12, 8))
    disposition_by_complaint_type.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
    
    ax.set_title("Complaints by Disposition and Complaint Type")
    ax.set_ylabel("Number of Complaints")
    ax.set_xlabel("Complaint Type")
    
    # Display the plot
    st.pyplot(fig)

    st.write("""
    **What this visualization shows:**  
    This stacked bar chart displays the distribution of complaints by their disposition for each complaint type. The bars represent different complaint categories, and each bar is broken down by the resolution status.
    
    **Why it's interesting:**  
    By combining complaint type and resolution status, this chart allows organizations to assess not only how many complaints are resolved or unresolved but also which types of complaints are most frequently resolved or still pending. This helps in identifying patterns in complaint resolution and provides insights into which complaint categories may need more attention to resolve.
    
    **Color Scheme:**  
    The 'inferno' color scheme is used to differentiate between the various disposition statuses, with each status getting a unique shade. This gradient of colors helps visualize the proportions and makes the chart more visually engaging. Lighter shades correspond to a higher frequency of a particular disposition status in the given complaint type.
    """)



elif viz_type == "Monthly Trends by Complaint Type":
    st.subheader("Monthly Trends Grouped by Complaint Types")
    monthly_trends = filtered_data.groupby(['Month Reported', 'Type of Complaint']).size().reset_index(name='Count')
    chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
        x=alt.X('Month Reported:O', title='Month'),
        y=alt.Y('Count:Q', title='Number of Complaints'),
        color='Type of Complaint:N'
    )
    st.altair_chart(chart, use_container_width=True)

    st.write("""
    **What this visualization shows:**  
    This line chart visualizes the monthly trends in complaint counts, grouped by complaint type. It allows tracking changes in complaint frequencies over time and identifying patterns or spikes in specific categories.
    
    **Why it's interesting:**  
    By visualizing these trends, you can identify whether certain complaint types are seasonal or are influenced by specific events. This information helps prioritize resources and refine strategies for complaint management.
    
    **Color Scheme:**  
    Different complaint types are represented by distinct colors, enabling easy comparison of trends across categories.
    """)


# elif viz_type == "Top Complaint Types":
#     st.subheader("Top Complaint Types")
#     complaint_counts = filtered_data['Type of Complaint'].value_counts().head(10)
#     fig, ax = plt.subplots()
#     sns.barplot(x=complaint_counts.values, y=complaint_counts.index, palette="inferno", ax=ax)
#     st.pyplot(fig)

#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the top 10 most common complaint types based on the number of occurrences. It provides a clear view of the most frequently reported issues.

#     **Why it's interesting:**  
#     By focusing on the top complaint types, organizations can identify and prioritize the issues that impact the majority of their users or customers. This can lead to targeted improvements in service or support efforts.

#     **Color Scheme:**  
#     The 'inferno' palette is used to emphasize the frequency of each complaint type, with darker shades representing higher frequencies.
#     """)

elif viz_type == "Complaints Over Time":
    st.subheader("Complaints Over Time")
    complaints_over_time = filtered_data_time.groupby(filtered_data_time['Date Reported'].dt.date).size()
    
    fig, ax = plt.subplots()
    ax.plot(complaints_over_time.index, complaints_over_time.values, marker='o', color='tab:purple')
    ax.set_title("Complaints Over Time")
    st.pyplot(fig)

    st.write("""
    **Write-up:** This visualization displays the trend of complaints over time using a line chart. 
    It shows the number of complaints reported for each day, making it easy to spot peaks or declines in complaints. 
    The use of a blue color scheme highlights the flow and continuity of the data, providing a clear view of the patterns over time.
    """)

elif viz_type == "Complaints by Housing Block and Type":
    st.subheader("Complaints by Housing Block and Type")
    
    # Filter the data based on the selected year and housing block
    filtered_data_time = filtered_data  # Use filtered_data if date range is not needed
    if selected_year != 'All Time':
        filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
    
    # Further filter by Housing Block (if applicable)
    if selected_block != 'All Blocks':
        filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
    
    # Pivot the data based on the filtered data
    complaint_pivot = filtered_data_time.pivot_table(
        index='Housing Block',
        columns='Type of Complaint',
        values='Disposition',
        aggfunc='count',
        fill_value=0
    )
    
    # Ensure the pivoted data is numeric for plotting
    complaint_pivot = complaint_pivot.astype(float)

    # Plot the data
    fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
    st.pyplot(fig)
    
    st.write("""
    **What this visualization shows:**  
    This line chart shows the trend of complaints over time, displaying the number of complaints reported for each day. It helps identify patterns, peaks, and trends in the complaints data.

    **Why it's interesting:**  
    By visualizing complaints over time, organizations can track the effectiveness of their responses and interventions. It also helps identify recurring spikes or declines in complaints, which could be indicative of specific events or issues.

    **Insight Potential:**  
    This chart can be used to correlate with external factors, such as changes in policies, incidents, or other events that might lead to a sudden increase or decrease in complaints. The insights can inform better decision-making and improve the organization's complaint management strategy.
    """)

# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal.")