File size: 33,501 Bytes
a1d9286
 
 
 
 
b7feb2b
0972530
a1d9286
 
 
 
 
7d2753c
a1d9286
 
 
 
 
57ada8f
88463a2
a1d9286
 
7d2753c
a1d9286
 
140971e
b7feb2b
 
 
 
 
7d2753c
b7feb2b
 
 
 
 
 
 
 
 
 
 
a1d9286
b7feb2b
 
 
 
 
 
 
 
 
a1d9286
b7feb2b
 
 
 
afab428
 
 
 
b7feb2b
 
7d2753c
b7feb2b
a1d9286
b7feb2b
a1d9286
38d8fcf
7d2753c
29c6866
f6bdcdc
0a70f96
8666827
29c6866
a5192ab
0a70f96
38d8fcf
f6bdcdc
38d8fcf
a5192ab
38d8fcf
5b69b6a
 
f6bdcdc
 
c1925f9
a5192ab
ec8d9e4
4b8004d
 
 
 
 
 
ed54ea1
 
 
 
4b8004d
 
 
a5192ab
 
 
6b16d15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d2753c
ad4db27
 
 
 
 
7d2753c
88098e3
ad4db27
b7feb2b
7d2753c
5b69b6a
 
 
 
 
 
38d8fcf
 
0a70f96
 
38d8fcf
1a4102e
 
b7feb2b
 
5b69b6a
8666827
7d2753c
97667b8
a1d9286
 
 
b7feb2b
 
 
 
a1d9286
fac1189
1d4c87b
6835d5e
fac1189
 
1d4c87b
6835d5e
1d4c87b
fac1189
1d4c87b
 
fac1189
4b58641
1d4c87b
 
 
 
 
6835d5e
a1d9286
1d4c87b
6835d5e
1d4c87b
fac1189
4b58641
fac1189
 
 
4b58641
1d4c87b
 
a1d9286
 
fac1189
6835d5e
a1d9286
 
 
 
fac1189
6835d5e
b7feb2b
fac1189
 
 
 
 
 
 
 
 
 
 
a1d9286
4b58641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1925f9
4b58641
8b33a3f
 
 
 
6835d5e
8b33a3f
 
6835d5e
8b33a3f
 
 
 
 
6835d5e
8b33a3f
 
 
 
 
6835d5e
8b33a3f
 
 
 
 
e4cc22a
 
8b33a3f
 
 
 
 
 
6835d5e
8b33a3f
 
874a4ac
8b33a3f
 
 
 
 
 
 
4b58641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d2753c
fac1189
a1d9286
 
4b58641
6835d5e
4b58641
 
6835d5e
4b58641
 
 
 
 
 
 
6835d5e
b7feb2b
0972530
fac1189
 
c1925f9
4b58641
fac1189
4b58641
 
fac1189
4b58641
fac1189
 
 
0972530
 
 
 
 
 
 
 
 
 
fac1189
 
 
 
 
7d2753c
fac1189
 
 
 
 
 
4b58641
 
 
 
 
 
0972530
4b58641
 
 
fac1189
4b58641
 
fac1189
4b58641
 
 
7d2753c
 
fac1189
0972530
 
1a4102e
fac1189
1a4102e
c1925f9
0972530
 
 
fac1189
7d2753c
 
 
 
 
 
 
 
 
fac1189
edea274
0972530
1a4102e
6835d5e
ad4db27
 
 
 
6835d5e
69bfce8
ad4db27
 
6835d5e
ad4db27
 
 
 
 
 
 
1a4102e
6835d5e
b01c1d2
 
6835d5e
0972530
 
88098e3
fac1189
 
7d2753c
fac1189
 
7d2753c
fac1189
7d2753c
 
 
0972530
522d8d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88463a2
 
8313ffb
88463a2
 
 
 
7dea025
88463a2
 
 
8bbe668
88463a2
 
 
 
 
 
 
 
49bf2e8
88463a2
 
49bf2e8
88463a2
 
 
 
 
 
 
 
 
 
7dea025
88463a2
 
77f004b
88463a2
 
 
ae27632
88463a2
 
8313ffb
88463a2
 
 
522d8d9
88463a2
 
 
 
 
 
 
 
 
8313ffb
88463a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49bf2e8
88463a2
 
49bf2e8
88463a2
 
 
858b949
8313ffb
ae27632
4b8004d
0972530
 
874a4ac
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from datetime import datetime
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px

# Setting page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")

# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
* By Ruchita Alate (ralate2@illinois.edu)
* This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
""")

# Loading and cleaning data
@st.cache_data
def load_and_clean_data():
    data = pd.read_csv('Nuisance_Complaints_20241130.csv')
    data = data.dropna(subset=['File Number'])
    data['Date Reported'] = pd.to_datetime(data['Date Reported'])
    data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
    data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')

    # Filling missing dates
    median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
    data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)

    data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
    most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
        lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
    )
    data['Disposition'] = data.apply(
        lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
    )

    data['Processing Time'] = np.where(
        data['File Close Date'].notna(),
        (data['File Close Date'] - data['Date Reported']).dt.days,
        np.nan
    )

    data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
    data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
    data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])

    data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
    data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
    data['Year Reported'] = data['Year Reported'].astype(int)
    data['Month Reported'] = data['Date Reported'].dt.month

    # Preprocessing the 'housing Block' column
    data['Housing Block'] = data['Housing Block'].str.strip().str.lower()
    
    return data

# Loading the data
data = load_and_clean_data()

# Sidebar for controls
st.sidebar.header("Dashboard Controls")

# Defining the visualization type (viz_type) selection
viz_type = st.sidebar.selectbox("Select Visualization", [
    "Complaint Types", "Geographic Distribution", "Complaints by Disposition", "Submission Methods",
    "Monthly Trends by Complaint Type", "Complaints Over Time", "Complaints by Housing Block and Type",
], key="viz_type_select")

# Remove the year selection when certain visualizations are selected
if viz_type not in ["Complaints Over Time", "Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"]:
    year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
    selected_year = st.sidebar.selectbox("Select Year", options=year_options, key="year_select")
else:
    selected_year = 'All Time'  # Default to 'All Time' if visualization doesn't require year

# Date Range Selector for Complaints Over Time (only show when Complaints Over Time is selected)
if viz_type == "Complaints Over Time":
    start_date = st.sidebar.date_input("Start Date", pd.to_datetime("2020-01-01"), key="start_date")
    end_date = st.sidebar.date_input("End Date", pd.to_datetime("2024-12-31"), key="end_date")

# Dropdown for Housing Block (only show when Complaints by Housing Block and Type or the updated version is selected)
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"]:
    # block_options = ['All Blocks'] + sorted(data['Housing Block'].unique().tolist())
    # selected_block = st.sidebar.selectbox(
    #     "Select Housing Block", 
    #     options=block_options, 
    #     key=f"block_select_{viz_type}"  # Unique key for each visualization
    # )
    valid_blocks = [
    block for block in data['Housing Block'].unique() 
    if block.split()[0].isdigit() and int(block.split()[0]) < 3400
]
    block_options = ['All Blocks'] + sorted(valid_blocks)
    selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")
 
# Ensuring selected_block is only used if defined
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"] and 'selected_block' not in locals():
    selected_block = 'All Blocks'  # Default to 'All Blocks' if no selection made

    # Filtering the data based on selected year
    filtered_data_time = data  # Use filtered_data if date range is not needed
    if selected_year != 'All Time':
        filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
    
    # Exclude blocks from the data
    filtered_data_time = filtered_data_time[~filtered_data_time['Housing Block'].isin(excluded_blocks)]
    
    # Get the list of blocks excluding the unwanted ones
    available_blocks = sorted(filtered_data_time['Housing Block'].unique().tolist())
    
    # Dropdown for Housing Block (excluding unwanted blocks)
    block_options = ['All Blocks'] + available_blocks
    selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")

    # Further filtering by selected Housing Block (if applicable)
    if selected_block != 'All Blocks':
        filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
        
# Filtering data based on selected year
if selected_year != 'All Time':
    filtered_data = data[data['Year Reported'] == selected_year]
else:
    filtered_data = data  

# Further filtering by Housing Block
if 'selected_block' in locals() and selected_block != 'All Blocks':
    filtered_data = filtered_data[filtered_data['Housing Block'] == selected_block]

# Filtering data based on date range (only for Complaints Over Time visualization)
if viz_type == "Complaints Over Time":
    filtered_data_time = filtered_data[
        (filtered_data['Date Reported'] >= pd.to_datetime(start_date)) & 
        (filtered_data['Date Reported'] <= pd.to_datetime(end_date))
    ]
else:
    filtered_data_time = filtered_data

# Filtering data based on selected housing block (only for Complaints by Housing Block and the updated version)
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"] and selected_block != 'All Blocks':
    filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]

# Header for selected year
st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")



# Displaying metrics
col1, col2 = st.columns(2)
with col1:
    st.metric("Total Complaints", len(filtered_data))
with col2:
    most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
    st.metric("Most Common Type", most_common)

# Visualizations
if viz_type == "Complaint Types":
    st.subheader("Top 5 Complaint Types Pie Chart")
    
    # Preparing data: Select the top 5 complaint types
    top_complaints = filtered_data['Type of Complaint'].value_counts().nlargest(5).reset_index()
    top_complaints.columns = ['Complaint Type', 'Count']
    
    # Creating an interactive pie chart with the 'inferno' color scheme
    fig = px.pie(
        top_complaints,
        names='Complaint Type',
        values='Count',
        title="Top 5 Complaint Types Distribution",
        color_discrete_sequence=px.colors.sequential.Inferno, 
        labels={"Count": "Number of Complaints", "Complaint Type": "Type of Complaint"},
        hover_data=['Count']
    )
    fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
    
    # Displaying chart
    st.plotly_chart(fig, use_container_width=True)
    
    # write-up
    st.write("""
    **What this visualization shows:**  
    This interactive pie chart displays the distribution of the top 5 complaint types by year.  
    **Why it's interesting:**  
    Hovering over each segment reveals detailed information, including the complaint type, the number of complaints, and its percentage of the total. By focusing on the top 5 complaint categories, this visualization helps identify the most commonly reported issues, enabling better prioritization of resources and targeted interventions.  
    **Color Scheme:**  
    Each complaint type is represented by a unique color from the 'inferno' color scheme, which visually distinguishes between categories and makes the chart more engaging.  
    """)

elif viz_type == "Geographic Distribution":
    st.subheader("Clustered Heatmap of Complaints")
    
    # Generating the heatmap
    map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=12)
    heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
    HeatMap(heat_data).add_to(m)
    
    # Displaying the map
    st_folium(m, width=700, height=500)
    
    # Write-up
    st.write("""
    **What this visualization shows:**  
    This clustered heatmap visualizes complaint locations across the city.  
    **Why it's interesting:**  
    It highlights geographic areas with higher complaint densities, known as hotspots. These areas can be targeted for intervention and resource allocation.  
    **Color Scheme:**  
    The heatmap uses gradient colors, where warmer tones (red/orange) represent higher densities of complaints, providing a clear visual cue for problem areas.  
    """)


# elif viz_type == "Resolution Status":
#     st.subheader("Interactive Complaint Resolution Status")
#     resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
#     resolution_counts.columns = ['Disposition', 'Count']
#     chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
#         theta=alt.Theta(field="Count", type="quantitative"),
#         color=alt.Color(field="Disposition", type="nominal"),
#         tooltip=[
#             alt.Tooltip("Disposition", title="Resolution"),
#             alt.Tooltip("Count", title="Count")
#         ]
#     )
#     st.altair_chart(chart, use_container_width=True)

#     st.write("""
#     **What this visualization shows:**  
#     This interactive donut chart displays the distribution of complaint resolutions, such as resolved, unresolved, or escalated cases.  
#     **Why it's interesting:**  
#     By analyzing the resolution status, we can assess the effectiveness of complaint handling and identify areas for improvement.  
#     **Color Scheme:**  
#     Each resolution status is represented by a distinct color, making it easy to differentiate between categories and quickly interpret the data.  
#     """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.

# Submission Methods Analysis
elif viz_type == "Submission Methods":
    st.subheader("Submission Methods Analysis")

    # Allowing the user to select the type of chart (Bar or Pie)
    plot_type = st.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])

    # Getting the top 5 submission methods
    submission_counts = filtered_data['Method Submitted'].value_counts().nlargest(5)
    submission_data = submission_counts.reset_index()
    submission_data.columns = ['Submission Method', 'Count']

    if plot_type == "Bar Chart":
        # Creating a bar chart with Seaborn
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(x=submission_data['Count'], y=submission_data['Submission Method'], palette='inferno', ax=ax)
        st.pyplot(fig)
    
    elif plot_type == "Pie Chart":
        # Creating an interactive pie chart with Plotly
        fig = px.pie(
            submission_data,
            names='Submission Method',
            values='Count',
            title="Top 5 Submission Methods Distribution",
            color='Submission Method',  # Automatically generates colors for each segment
            color_discrete_sequence=px.colors.sequential.Inferno,  # Correct reference to Inferno color scale
            labels={"Count": "Number of Complaints", "Submission Method": "Method Submitted"},
            hover_data=['Count']
        )
        fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
        st.plotly_chart(fig, use_container_width=True)

    # Write-up
    st.write("""
    **What this visualization shows:**  
    This chart shows the number of complaints submitted via different methods, such as email, phone, online form, etc., with a focus on the top 5 submission methods. If one wishes to see the percentage wise distribution, pie chart option can be selected from the interactivity option provided.
    
    **Why it's interesting:**  
    By analyzing submission methods, we can understand how users prefer to submit complaints. This insight helps in focusing efforts on improving the most used channels, ensuring better user engagement.
    
    **Color Scheme:**  
    The 'inferno' color palette highlights differences in submission frequency, with darker shades representing higher submission counts.
    """)



# elif viz_type == "Complaints by Disposition":
#     st.subheader("Complaints by Disposition")
#     disposition_counts = filtered_data['Disposition'].value_counts()
#     fig, ax = plt.subplots(figsize=(10, 6))
#     sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
#     st.pyplot(fig)

#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the distribution of complaints by their resolution status (disposition), such as 'Resolved', 'Unresolved', etc.  
#     **Why it's interesting:**  
#     By examining the disposition of complaints, organizations can assess how effectively issues are being addressed and identify any areas needing improvement.  
#     **Color Scheme:**  
#     The 'viridis' color palette highlights differences in complaint resolution status, with lighter shades indicating a higher frequency of resolved complaints.  
#     """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.

elif viz_type == "Complaints by Disposition":
    st.subheader("Complaints by Disposition")
    
    # Grouping by Complaint Type and Disposition to make the analysis more complex
    disposition_by_complaint_type = filtered_data.groupby(['Type of Complaint', 'Disposition']).size().unstack(fill_value=0)
    
    # Plotting the bar chart with the 'inferno' color scheme
    fig, ax = plt.subplots(figsize=(12, 8))
    disposition_by_complaint_type.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
    
    ax.set_title("Complaints by Disposition and Complaint Type")
    ax.set_ylabel("Number of Complaints")
    ax.set_xlabel("Complaint Type")
    
    # Displaying the plot
    st.pyplot(fig)

    st.write("""
    **What this visualization shows:**  
    This stacked bar chart displays the distribution of complaints by their disposition for each complaint type. The bars represent different complaint categories, and each bar is broken down by the resolution status.
    
    **Why it's interesting:**  
    By combining complaint type and resolution status, this chart allows organizations to assess not only how many complaints are resolved or unresolved but also which types of complaints are most frequently resolved or still pending. This helps in identifying patterns in complaint resolution and provides insights into which complaint categories may need more attention to resolve.
    
    **Color Scheme:**  
    The 'inferno' color scheme is used to differentiate between the various disposition statuses, with each status getting a unique shade. This gradient of colors helps visualize the proportions and makes the chart more visually engaging. Lighter shades correspond to a higher frequency of a particular disposition status in the given complaint type.
    """)


elif viz_type == "Monthly Trends by Complaint Type":
    st.subheader("Monthly Trends Grouped by Complaint Types")
    monthly_trends = filtered_data.groupby(['Month Reported', 'Type of Complaint']).size().reset_index(name='Count')
    chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
        x=alt.X('Month Reported:O', title='Month'),
        y=alt.Y('Count:Q', title='Number of Complaints'),
        color='Type of Complaint:N'
    )
    st.altair_chart(chart, use_container_width=True)

    st.write("""
    **What this visualization shows:**  
    This line chart visualizes the monthly trends in complaint counts, grouped by complaint type. It allows tracking changes in complaint frequencies over time and identifying patterns or spikes in specific categories.
    
    **Why it's interesting:**  
    By visualizing these trends, we can identify whether certain complaint types are seasonal or are influenced by specific events. This information helps prioritize resources and refine strategies for complaint management.
    
    **Color Scheme:**  
    Different complaint types are represented by distinct colors, enabling easy comparison of trends across categories.
    """)


# elif viz_type == "Top Complaint Types":
#     st.subheader("Top Complaint Types")
#     complaint_counts = filtered_data['Type of Complaint'].value_counts().head(10)
#     fig, ax = plt.subplots()
#     sns.barplot(x=complaint_counts.values, y=complaint_counts.index, palette="inferno", ax=ax)
#     st.pyplot(fig)

#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the top 10 most common complaint types based on the number of occurrences. It provides a clear view of the most frequently reported issues.

#     **Why it's interesting:**  
#     By focusing on the top complaint types, organizations can identify and prioritize the issues that impact the majority of their users or customers. This can lead to targeted improvements in service or support efforts.

#     **Color Scheme:**  
#     The 'inferno' palette is used to emphasize the frequency of each complaint type, with darker shades representing higher frequencies.
#     """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.


elif viz_type == "Complaints Over Time":
    st.subheader("Complaints Over Time")
    complaints_over_time = filtered_data_time.groupby(filtered_data_time['Date Reported'].dt.date).size()
    
    fig, ax = plt.subplots()
    ax.plot(complaints_over_time.index, complaints_over_time.values, marker='o', color='tab:purple')
    ax.set_title("Complaints Over Time")
    st.pyplot(fig)

    st.write("""
    **What this visualization shows:**  
    This visualization displays the trend of complaints over time using a line chart. It shows the number of complaints reported , making it easy to spot peaks or declines in complaints.

    **Why it's interesting:**  
    By examining the complaints over time, users can identify patterns, such as specific days or periods with higher or lower complaint volumes. This can inform decision-making and help allocate resources more effectively during high-complaint periods.

    **Color Scheme:**  
    The use of a purple color scheme highlights the flow and continuity of the data, providing a clear view of the patterns over time.
""")

elif viz_type == "Complaints by Housing Block and Type":
    st.subheader("Complaints by Housing Block and Type")
    
    # Filtering the data based on the selected year and housing block
    filtered_data_time = filtered_data  # Use filtered_data if date range is not needed
    if selected_year != 'All Time':
        filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
    
    # Further filtering by Housing Block (if applicable)
    if selected_block != 'All Blocks':
        filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
    
    # Pivoting the data based on the filtered data
    complaint_pivot = filtered_data_time.pivot_table(
        index='Housing Block',
        columns='Type of Complaint',
        values='Disposition',
        aggfunc='count',
        fill_value=0
    )
    
    # Ensuring the pivoted data is numeric for plotting
    complaint_pivot = complaint_pivot.astype(float)

    # Plotting the data
    fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
    st.pyplot(fig)
    
    st.write("""
    **What this visualization shows:**  
    This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the total number of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks.

    **Why it's interesting:**  
    By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.

    **Color Scheme:**  
    The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
""")

# elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
#     st.subheader("Complaints by Housing Block and Type- Incorporating Suggestions Based on Professor's Feedback")
    
#     # Filtering the data based on the selected year and housing block
#     filtered_data_time = data  # Use filtered_data if date range is not needed
#     if selected_year != 'All Time':
#         filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
    
#     # Further filtering by Housing Block (if applicable)
#     if selected_block != 'All Blocks':
#         filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
    
#     # Pivoting the data based on the filtered data
#     complaint_pivot = filtered_data_time.pivot_table(
#         index='Housing Block',
#         columns='Type of Complaint',
#         values='Disposition',
#         aggfunc='count',
#         fill_value=0
#     )
    
#     # Ensuring the pivoted data is numeric for plotting
#     complaint_pivot = complaint_pivot.astype(float)

#     # Desired order for the housing blocks
#     desired_order = [
#         '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
#         '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
#         '1200 block', '1300 block', '1400 block', '1500 block', '1600 block', 
#         '1700 block', '1800 block', '1900 block', '2000 block', '2100 block', 
#         '2200 block', '2300 block', '2400 block', '2500 block', '2600 block', 
#         '2700 block', '2800 block', '2900 block', '3000 block', '3100 block', 
#         '3200 block', '3300 block', '3400 block', '3500 block', '3600 block', 
#         '3700 block', '3800 block', '3900 block', '4000 block', '4100 block', 
#         '4200 block', '4300 block', '4400 block', '4500 block', '4600 block', 
#         '4700 block', '4800 block', '4900 block', '5000 block'
#     ]

#     # Reordering the index of the pivot table according to the desired order
#     complaint_pivot = complaint_pivot.reindex(desired_order)

#     # Calculating percentages for each complaint type per housing block
#     percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100

#     # Plotting the data
#     fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()

#     # Adding percentage labels to the plot
#     ax = fig.gca()
#     for idx, block in enumerate(complaint_pivot.index):
#         cumulative_height = 0
#         for i, complaint_type in enumerate(complaint_pivot.columns):
#             count = complaint_pivot.iloc[idx, i]
#             percent = percentages.iloc[idx, i]
#             if count > 0:
#                 # Compute the position for the percentage label
#                 x_pos = idx - 0.4 + 0.8 / 2  # Adjusting the position of the label
#                 y_pos = cumulative_height + count / 2
#                 ax.text(
#                     x_pos, y_pos, f"{percent:.1f}%", 
#                     ha='center', va='center', 
#                     fontsize=10, color='black', 
#                     bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
#                 )
#             cumulative_height += count

#     # Display the plot in Streamlit
#     st.pyplot(fig)

#     # writeup
#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
    
#     **Why it's interesting:**  
#     By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
    
#     **Color Scheme:**  
#     The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
#     """)
# In the above code , We incorporated all of the professor's suggestions and refined the chart to make it more useful for analysis while ensuring good aesthetics. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.

# if viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
#     st.subheader("Complaints by Housing Block and Type - Incorporating Suggestions Based on Professor's Feedback")
    
#     # Filtering the data based on the selected year and housing block
#     filtered_data_time = data  # Use filtered_data if date range is not needed
#     if selected_year != 'All Time':
#         filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
    
#     # Further filtering by Housing Block (if applicable)
#     if selected_block != 'All Blocks':
#         filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
    
#     # Pivoting the data based on the filtered data
#     complaint_pivot = filtered_data_time.pivot_table(
#         index='Housing Block',
#         columns='Type of Complaint',
#         values='Disposition',
#         aggfunc='count',
#         fill_value=0
#     )
    
#     # Ensuring the pivoted data is numeric for plotting
#     complaint_pivot = complaint_pivot.astype(float)

#     # Desired order for the housing blocks
#     desired_order = [
#         '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
#         '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
#         '1200 block', '1300 block', '1400 block', '1500 block', '1600 block', 
#         '1700 block', '1800 block', '1900 block', '2000 block', '2100 block', 
#         '2200 block', '2300 block', '2400 block', '2500 block', '2600 block', 
#         '2700 block', '2800 block', '2900 block', '3000 block', '3100 block', 
#         '3200 block', '3300 block'
#     ]

#     # Reordering the index of the pivot table according to the desired order
#     complaint_pivot = complaint_pivot.reindex(desired_order)

#     # If a single block is selected, filter for only that block
#     if selected_block != 'All Blocks':
#         complaint_pivot = complaint_pivot.loc[[selected_block]]

#     # Calculating percentages for each complaint type per housing block
#     percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100

#     # Plotting the data
#     fig, ax = plt.subplots(figsize=(10, 6))
#     complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)

#     # Adjusting the x-axis ticks
#     if selected_block != 'All Blocks':
#         ax.set_xticks([0])  # Only one label
#         ax.set_xticklabels([selected_block], rotation=0)
#     else:
#         # Show every nth label to avoid overcrowding
#         tick_spacing = max(1, len(complaint_pivot) // 10)  # Adjust based on the number of blocks
#         ax.set_xticks(range(0, len(complaint_pivot.index), tick_spacing))
#         ax.set_xticklabels(complaint_pivot.index[::tick_spacing], rotation=45, ha='right')

#     # Adding percentage labels to the plot
#     for idx, block in enumerate(complaint_pivot.index):
#         cumulative_height = 0
#         for i, complaint_type in enumerate(complaint_pivot.columns):
#             count = complaint_pivot.iloc[idx, i]
#             percent = percentages.iloc[idx, i]
#             if count > 0:
#                 # Compute the position for the percentage label
#                 x_pos = idx - 0.4 + 0.8 / 2  # Adjusting the position of the label
#                 y_pos = cumulative_height + count / 2
#                 ax.text(
#                     x_pos, y_pos, f"{percent:.1f}%", 
#                     ha='center', va='center', 
#                     fontsize=10, color='black', 
#                     bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
#                 )
#             cumulative_height += count

#     # Setting labels and title
#     ax.set_xlabel('Housing Block')
#     ax.set_ylabel('Number of Complaints')
#     ax.set_title('Complaints by Housing Block and Type')

#     # Display the plot in Streamlit
#     st.pyplot(fig)

#     # Writeup
#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
    
#     **Why it's interesting:**  
#     By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
    
#     **Color Scheme:**  
#     The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
#     """)



   
# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal - https://data.urbanaillinois.us/Environment/Nuisance-Complaints/tsn9-95m3/about_data ")