Spaces:
Sleeping
Sleeping
File size: 33,501 Bytes
a1d9286 b7feb2b 0972530 a1d9286 7d2753c a1d9286 57ada8f 88463a2 a1d9286 7d2753c a1d9286 140971e b7feb2b 7d2753c b7feb2b a1d9286 b7feb2b a1d9286 b7feb2b afab428 b7feb2b 7d2753c b7feb2b a1d9286 b7feb2b a1d9286 38d8fcf 7d2753c 29c6866 f6bdcdc 0a70f96 8666827 29c6866 a5192ab 0a70f96 38d8fcf f6bdcdc 38d8fcf a5192ab 38d8fcf 5b69b6a f6bdcdc c1925f9 a5192ab ec8d9e4 4b8004d ed54ea1 4b8004d a5192ab 6b16d15 7d2753c ad4db27 7d2753c 88098e3 ad4db27 b7feb2b 7d2753c 5b69b6a 38d8fcf 0a70f96 38d8fcf 1a4102e b7feb2b 5b69b6a 8666827 7d2753c 97667b8 a1d9286 b7feb2b a1d9286 fac1189 1d4c87b 6835d5e fac1189 1d4c87b 6835d5e 1d4c87b fac1189 1d4c87b fac1189 4b58641 1d4c87b 6835d5e a1d9286 1d4c87b 6835d5e 1d4c87b fac1189 4b58641 fac1189 4b58641 1d4c87b a1d9286 fac1189 6835d5e a1d9286 fac1189 6835d5e b7feb2b fac1189 a1d9286 4b58641 c1925f9 4b58641 8b33a3f 6835d5e 8b33a3f 6835d5e 8b33a3f 6835d5e 8b33a3f 6835d5e 8b33a3f e4cc22a 8b33a3f 6835d5e 8b33a3f 874a4ac 8b33a3f 4b58641 7d2753c fac1189 a1d9286 4b58641 6835d5e 4b58641 6835d5e 4b58641 6835d5e b7feb2b 0972530 fac1189 c1925f9 4b58641 fac1189 4b58641 fac1189 4b58641 fac1189 0972530 fac1189 7d2753c fac1189 4b58641 0972530 4b58641 fac1189 4b58641 fac1189 4b58641 7d2753c fac1189 0972530 1a4102e fac1189 1a4102e c1925f9 0972530 fac1189 7d2753c fac1189 edea274 0972530 1a4102e 6835d5e ad4db27 6835d5e 69bfce8 ad4db27 6835d5e ad4db27 1a4102e 6835d5e b01c1d2 6835d5e 0972530 88098e3 fac1189 7d2753c fac1189 7d2753c fac1189 7d2753c 0972530 522d8d9 88463a2 8313ffb 88463a2 7dea025 88463a2 8bbe668 88463a2 49bf2e8 88463a2 49bf2e8 88463a2 7dea025 88463a2 77f004b 88463a2 ae27632 88463a2 8313ffb 88463a2 522d8d9 88463a2 8313ffb 88463a2 49bf2e8 88463a2 49bf2e8 88463a2 858b949 8313ffb ae27632 4b8004d 0972530 874a4ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 | import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from datetime import datetime
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px
# Setting page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
* By Ruchita Alate (ralate2@illinois.edu)
* This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
""")
# Loading and cleaning data
@st.cache_data
def load_and_clean_data():
data = pd.read_csv('Nuisance_Complaints_20241130.csv')
data = data.dropna(subset=['File Number'])
data['Date Reported'] = pd.to_datetime(data['Date Reported'])
data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
# Filling missing dates
median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
)
data['Disposition'] = data.apply(
lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
)
data['Processing Time'] = np.where(
data['File Close Date'].notna(),
(data['File Close Date'] - data['Date Reported']).dt.days,
np.nan
)
data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
data['Year Reported'] = data['Year Reported'].astype(int)
data['Month Reported'] = data['Date Reported'].dt.month
# Preprocessing the 'housing Block' column
data['Housing Block'] = data['Housing Block'].str.strip().str.lower()
return data
# Loading the data
data = load_and_clean_data()
# Sidebar for controls
st.sidebar.header("Dashboard Controls")
# Defining the visualization type (viz_type) selection
viz_type = st.sidebar.selectbox("Select Visualization", [
"Complaint Types", "Geographic Distribution", "Complaints by Disposition", "Submission Methods",
"Monthly Trends by Complaint Type", "Complaints Over Time", "Complaints by Housing Block and Type",
], key="viz_type_select")
# Remove the year selection when certain visualizations are selected
if viz_type not in ["Complaints Over Time", "Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"]:
year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
selected_year = st.sidebar.selectbox("Select Year", options=year_options, key="year_select")
else:
selected_year = 'All Time' # Default to 'All Time' if visualization doesn't require year
# Date Range Selector for Complaints Over Time (only show when Complaints Over Time is selected)
if viz_type == "Complaints Over Time":
start_date = st.sidebar.date_input("Start Date", pd.to_datetime("2020-01-01"), key="start_date")
end_date = st.sidebar.date_input("End Date", pd.to_datetime("2024-12-31"), key="end_date")
# Dropdown for Housing Block (only show when Complaints by Housing Block and Type or the updated version is selected)
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"]:
# block_options = ['All Blocks'] + sorted(data['Housing Block'].unique().tolist())
# selected_block = st.sidebar.selectbox(
# "Select Housing Block",
# options=block_options,
# key=f"block_select_{viz_type}" # Unique key for each visualization
# )
valid_blocks = [
block for block in data['Housing Block'].unique()
if block.split()[0].isdigit() and int(block.split()[0]) < 3400
]
block_options = ['All Blocks'] + sorted(valid_blocks)
selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")
# Ensuring selected_block is only used if defined
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"] and 'selected_block' not in locals():
selected_block = 'All Blocks' # Default to 'All Blocks' if no selection made
# Filtering the data based on selected year
filtered_data_time = data # Use filtered_data if date range is not needed
if selected_year != 'All Time':
filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# Exclude blocks from the data
filtered_data_time = filtered_data_time[~filtered_data_time['Housing Block'].isin(excluded_blocks)]
# Get the list of blocks excluding the unwanted ones
available_blocks = sorted(filtered_data_time['Housing Block'].unique().tolist())
# Dropdown for Housing Block (excluding unwanted blocks)
block_options = ['All Blocks'] + available_blocks
selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")
# Further filtering by selected Housing Block (if applicable)
if selected_block != 'All Blocks':
filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# Filtering data based on selected year
if selected_year != 'All Time':
filtered_data = data[data['Year Reported'] == selected_year]
else:
filtered_data = data
# Further filtering by Housing Block
if 'selected_block' in locals() and selected_block != 'All Blocks':
filtered_data = filtered_data[filtered_data['Housing Block'] == selected_block]
# Filtering data based on date range (only for Complaints Over Time visualization)
if viz_type == "Complaints Over Time":
filtered_data_time = filtered_data[
(filtered_data['Date Reported'] >= pd.to_datetime(start_date)) &
(filtered_data['Date Reported'] <= pd.to_datetime(end_date))
]
else:
filtered_data_time = filtered_data
# Filtering data based on selected housing block (only for Complaints by Housing Block and the updated version)
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"] and selected_block != 'All Blocks':
filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# Header for selected year
st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")
# Displaying metrics
col1, col2 = st.columns(2)
with col1:
st.metric("Total Complaints", len(filtered_data))
with col2:
most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
st.metric("Most Common Type", most_common)
# Visualizations
if viz_type == "Complaint Types":
st.subheader("Top 5 Complaint Types Pie Chart")
# Preparing data: Select the top 5 complaint types
top_complaints = filtered_data['Type of Complaint'].value_counts().nlargest(5).reset_index()
top_complaints.columns = ['Complaint Type', 'Count']
# Creating an interactive pie chart with the 'inferno' color scheme
fig = px.pie(
top_complaints,
names='Complaint Type',
values='Count',
title="Top 5 Complaint Types Distribution",
color_discrete_sequence=px.colors.sequential.Inferno,
labels={"Count": "Number of Complaints", "Complaint Type": "Type of Complaint"},
hover_data=['Count']
)
fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
# Displaying chart
st.plotly_chart(fig, use_container_width=True)
# write-up
st.write("""
**What this visualization shows:**
This interactive pie chart displays the distribution of the top 5 complaint types by year.
**Why it's interesting:**
Hovering over each segment reveals detailed information, including the complaint type, the number of complaints, and its percentage of the total. By focusing on the top 5 complaint categories, this visualization helps identify the most commonly reported issues, enabling better prioritization of resources and targeted interventions.
**Color Scheme:**
Each complaint type is represented by a unique color from the 'inferno' color scheme, which visually distinguishes between categories and makes the chart more engaging.
""")
elif viz_type == "Geographic Distribution":
st.subheader("Clustered Heatmap of Complaints")
# Generating the heatmap
map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)
heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
HeatMap(heat_data).add_to(m)
# Displaying the map
st_folium(m, width=700, height=500)
# Write-up
st.write("""
**What this visualization shows:**
This clustered heatmap visualizes complaint locations across the city.
**Why it's interesting:**
It highlights geographic areas with higher complaint densities, known as hotspots. These areas can be targeted for intervention and resource allocation.
**Color Scheme:**
The heatmap uses gradient colors, where warmer tones (red/orange) represent higher densities of complaints, providing a clear visual cue for problem areas.
""")
# elif viz_type == "Resolution Status":
# st.subheader("Interactive Complaint Resolution Status")
# resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
# resolution_counts.columns = ['Disposition', 'Count']
# chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
# theta=alt.Theta(field="Count", type="quantitative"),
# color=alt.Color(field="Disposition", type="nominal"),
# tooltip=[
# alt.Tooltip("Disposition", title="Resolution"),
# alt.Tooltip("Count", title="Count")
# ]
# )
# st.altair_chart(chart, use_container_width=True)
# st.write("""
# **What this visualization shows:**
# This interactive donut chart displays the distribution of complaint resolutions, such as resolved, unresolved, or escalated cases.
# **Why it's interesting:**
# By analyzing the resolution status, we can assess the effectiveness of complaint handling and identify areas for improvement.
# **Color Scheme:**
# Each resolution status is represented by a distinct color, making it easy to differentiate between categories and quickly interpret the data.
# """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.
# Submission Methods Analysis
elif viz_type == "Submission Methods":
st.subheader("Submission Methods Analysis")
# Allowing the user to select the type of chart (Bar or Pie)
plot_type = st.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])
# Getting the top 5 submission methods
submission_counts = filtered_data['Method Submitted'].value_counts().nlargest(5)
submission_data = submission_counts.reset_index()
submission_data.columns = ['Submission Method', 'Count']
if plot_type == "Bar Chart":
# Creating a bar chart with Seaborn
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=submission_data['Count'], y=submission_data['Submission Method'], palette='inferno', ax=ax)
st.pyplot(fig)
elif plot_type == "Pie Chart":
# Creating an interactive pie chart with Plotly
fig = px.pie(
submission_data,
names='Submission Method',
values='Count',
title="Top 5 Submission Methods Distribution",
color='Submission Method', # Automatically generates colors for each segment
color_discrete_sequence=px.colors.sequential.Inferno, # Correct reference to Inferno color scale
labels={"Count": "Number of Complaints", "Submission Method": "Method Submitted"},
hover_data=['Count']
)
fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
st.plotly_chart(fig, use_container_width=True)
# Write-up
st.write("""
**What this visualization shows:**
This chart shows the number of complaints submitted via different methods, such as email, phone, online form, etc., with a focus on the top 5 submission methods. If one wishes to see the percentage wise distribution, pie chart option can be selected from the interactivity option provided.
**Why it's interesting:**
By analyzing submission methods, we can understand how users prefer to submit complaints. This insight helps in focusing efforts on improving the most used channels, ensuring better user engagement.
**Color Scheme:**
The 'inferno' color palette highlights differences in submission frequency, with darker shades representing higher submission counts.
""")
# elif viz_type == "Complaints by Disposition":
# st.subheader("Complaints by Disposition")
# disposition_counts = filtered_data['Disposition'].value_counts()
# fig, ax = plt.subplots(figsize=(10, 6))
# sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
# st.pyplot(fig)
# st.write("""
# **What this visualization shows:**
# This bar chart displays the distribution of complaints by their resolution status (disposition), such as 'Resolved', 'Unresolved', etc.
# **Why it's interesting:**
# By examining the disposition of complaints, organizations can assess how effectively issues are being addressed and identify any areas needing improvement.
# **Color Scheme:**
# The 'viridis' color palette highlights differences in complaint resolution status, with lighter shades indicating a higher frequency of resolved complaints.
# """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.
elif viz_type == "Complaints by Disposition":
st.subheader("Complaints by Disposition")
# Grouping by Complaint Type and Disposition to make the analysis more complex
disposition_by_complaint_type = filtered_data.groupby(['Type of Complaint', 'Disposition']).size().unstack(fill_value=0)
# Plotting the bar chart with the 'inferno' color scheme
fig, ax = plt.subplots(figsize=(12, 8))
disposition_by_complaint_type.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
ax.set_title("Complaints by Disposition and Complaint Type")
ax.set_ylabel("Number of Complaints")
ax.set_xlabel("Complaint Type")
# Displaying the plot
st.pyplot(fig)
st.write("""
**What this visualization shows:**
This stacked bar chart displays the distribution of complaints by their disposition for each complaint type. The bars represent different complaint categories, and each bar is broken down by the resolution status.
**Why it's interesting:**
By combining complaint type and resolution status, this chart allows organizations to assess not only how many complaints are resolved or unresolved but also which types of complaints are most frequently resolved or still pending. This helps in identifying patterns in complaint resolution and provides insights into which complaint categories may need more attention to resolve.
**Color Scheme:**
The 'inferno' color scheme is used to differentiate between the various disposition statuses, with each status getting a unique shade. This gradient of colors helps visualize the proportions and makes the chart more visually engaging. Lighter shades correspond to a higher frequency of a particular disposition status in the given complaint type.
""")
elif viz_type == "Monthly Trends by Complaint Type":
st.subheader("Monthly Trends Grouped by Complaint Types")
monthly_trends = filtered_data.groupby(['Month Reported', 'Type of Complaint']).size().reset_index(name='Count')
chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
x=alt.X('Month Reported:O', title='Month'),
y=alt.Y('Count:Q', title='Number of Complaints'),
color='Type of Complaint:N'
)
st.altair_chart(chart, use_container_width=True)
st.write("""
**What this visualization shows:**
This line chart visualizes the monthly trends in complaint counts, grouped by complaint type. It allows tracking changes in complaint frequencies over time and identifying patterns or spikes in specific categories.
**Why it's interesting:**
By visualizing these trends, we can identify whether certain complaint types are seasonal or are influenced by specific events. This information helps prioritize resources and refine strategies for complaint management.
**Color Scheme:**
Different complaint types are represented by distinct colors, enabling easy comparison of trends across categories.
""")
# elif viz_type == "Top Complaint Types":
# st.subheader("Top Complaint Types")
# complaint_counts = filtered_data['Type of Complaint'].value_counts().head(10)
# fig, ax = plt.subplots()
# sns.barplot(x=complaint_counts.values, y=complaint_counts.index, palette="inferno", ax=ax)
# st.pyplot(fig)
# st.write("""
# **What this visualization shows:**
# This bar chart displays the top 10 most common complaint types based on the number of occurrences. It provides a clear view of the most frequently reported issues.
# **Why it's interesting:**
# By focusing on the top complaint types, organizations can identify and prioritize the issues that impact the majority of their users or customers. This can lead to targeted improvements in service or support efforts.
# **Color Scheme:**
# The 'inferno' palette is used to emphasize the frequency of each complaint type, with darker shades representing higher frequencies.
# """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.
elif viz_type == "Complaints Over Time":
st.subheader("Complaints Over Time")
complaints_over_time = filtered_data_time.groupby(filtered_data_time['Date Reported'].dt.date).size()
fig, ax = plt.subplots()
ax.plot(complaints_over_time.index, complaints_over_time.values, marker='o', color='tab:purple')
ax.set_title("Complaints Over Time")
st.pyplot(fig)
st.write("""
**What this visualization shows:**
This visualization displays the trend of complaints over time using a line chart. It shows the number of complaints reported , making it easy to spot peaks or declines in complaints.
**Why it's interesting:**
By examining the complaints over time, users can identify patterns, such as specific days or periods with higher or lower complaint volumes. This can inform decision-making and help allocate resources more effectively during high-complaint periods.
**Color Scheme:**
The use of a purple color scheme highlights the flow and continuity of the data, providing a clear view of the patterns over time.
""")
elif viz_type == "Complaints by Housing Block and Type":
st.subheader("Complaints by Housing Block and Type")
# Filtering the data based on the selected year and housing block
filtered_data_time = filtered_data # Use filtered_data if date range is not needed
if selected_year != 'All Time':
filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# Further filtering by Housing Block (if applicable)
if selected_block != 'All Blocks':
filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# Pivoting the data based on the filtered data
complaint_pivot = filtered_data_time.pivot_table(
index='Housing Block',
columns='Type of Complaint',
values='Disposition',
aggfunc='count',
fill_value=0
)
# Ensuring the pivoted data is numeric for plotting
complaint_pivot = complaint_pivot.astype(float)
# Plotting the data
fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
st.pyplot(fig)
st.write("""
**What this visualization shows:**
This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the total number of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks.
**Why it's interesting:**
By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
**Color Scheme:**
The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
""")
# elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
# st.subheader("Complaints by Housing Block and Type- Incorporating Suggestions Based on Professor's Feedback")
# # Filtering the data based on the selected year and housing block
# filtered_data_time = data # Use filtered_data if date range is not needed
# if selected_year != 'All Time':
# filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# # Further filtering by Housing Block (if applicable)
# if selected_block != 'All Blocks':
# filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# # Pivoting the data based on the filtered data
# complaint_pivot = filtered_data_time.pivot_table(
# index='Housing Block',
# columns='Type of Complaint',
# values='Disposition',
# aggfunc='count',
# fill_value=0
# )
# # Ensuring the pivoted data is numeric for plotting
# complaint_pivot = complaint_pivot.astype(float)
# # Desired order for the housing blocks
# desired_order = [
# '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
# '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
# '1200 block', '1300 block', '1400 block', '1500 block', '1600 block',
# '1700 block', '1800 block', '1900 block', '2000 block', '2100 block',
# '2200 block', '2300 block', '2400 block', '2500 block', '2600 block',
# '2700 block', '2800 block', '2900 block', '3000 block', '3100 block',
# '3200 block', '3300 block', '3400 block', '3500 block', '3600 block',
# '3700 block', '3800 block', '3900 block', '4000 block', '4100 block',
# '4200 block', '4300 block', '4400 block', '4500 block', '4600 block',
# '4700 block', '4800 block', '4900 block', '5000 block'
# ]
# # Reordering the index of the pivot table according to the desired order
# complaint_pivot = complaint_pivot.reindex(desired_order)
# # Calculating percentages for each complaint type per housing block
# percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
# # Plotting the data
# fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
# # Adding percentage labels to the plot
# ax = fig.gca()
# for idx, block in enumerate(complaint_pivot.index):
# cumulative_height = 0
# for i, complaint_type in enumerate(complaint_pivot.columns):
# count = complaint_pivot.iloc[idx, i]
# percent = percentages.iloc[idx, i]
# if count > 0:
# # Compute the position for the percentage label
# x_pos = idx - 0.4 + 0.8 / 2 # Adjusting the position of the label
# y_pos = cumulative_height + count / 2
# ax.text(
# x_pos, y_pos, f"{percent:.1f}%",
# ha='center', va='center',
# fontsize=10, color='black',
# bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
# )
# cumulative_height += count
# # Display the plot in Streamlit
# st.pyplot(fig)
# # writeup
# st.write("""
# **What this visualization shows:**
# This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
# **Why it's interesting:**
# By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
# **Color Scheme:**
# The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
# """)
# In the above code , We incorporated all of the professor's suggestions and refined the chart to make it more useful for analysis while ensuring good aesthetics. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
# if viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
# st.subheader("Complaints by Housing Block and Type - Incorporating Suggestions Based on Professor's Feedback")
# # Filtering the data based on the selected year and housing block
# filtered_data_time = data # Use filtered_data if date range is not needed
# if selected_year != 'All Time':
# filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# # Further filtering by Housing Block (if applicable)
# if selected_block != 'All Blocks':
# filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# # Pivoting the data based on the filtered data
# complaint_pivot = filtered_data_time.pivot_table(
# index='Housing Block',
# columns='Type of Complaint',
# values='Disposition',
# aggfunc='count',
# fill_value=0
# )
# # Ensuring the pivoted data is numeric for plotting
# complaint_pivot = complaint_pivot.astype(float)
# # Desired order for the housing blocks
# desired_order = [
# '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
# '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
# '1200 block', '1300 block', '1400 block', '1500 block', '1600 block',
# '1700 block', '1800 block', '1900 block', '2000 block', '2100 block',
# '2200 block', '2300 block', '2400 block', '2500 block', '2600 block',
# '2700 block', '2800 block', '2900 block', '3000 block', '3100 block',
# '3200 block', '3300 block'
# ]
# # Reordering the index of the pivot table according to the desired order
# complaint_pivot = complaint_pivot.reindex(desired_order)
# # If a single block is selected, filter for only that block
# if selected_block != 'All Blocks':
# complaint_pivot = complaint_pivot.loc[[selected_block]]
# # Calculating percentages for each complaint type per housing block
# percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
# # Plotting the data
# fig, ax = plt.subplots(figsize=(10, 6))
# complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
# # Adjusting the x-axis ticks
# if selected_block != 'All Blocks':
# ax.set_xticks([0]) # Only one label
# ax.set_xticklabels([selected_block], rotation=0)
# else:
# # Show every nth label to avoid overcrowding
# tick_spacing = max(1, len(complaint_pivot) // 10) # Adjust based on the number of blocks
# ax.set_xticks(range(0, len(complaint_pivot.index), tick_spacing))
# ax.set_xticklabels(complaint_pivot.index[::tick_spacing], rotation=45, ha='right')
# # Adding percentage labels to the plot
# for idx, block in enumerate(complaint_pivot.index):
# cumulative_height = 0
# for i, complaint_type in enumerate(complaint_pivot.columns):
# count = complaint_pivot.iloc[idx, i]
# percent = percentages.iloc[idx, i]
# if count > 0:
# # Compute the position for the percentage label
# x_pos = idx - 0.4 + 0.8 / 2 # Adjusting the position of the label
# y_pos = cumulative_height + count / 2
# ax.text(
# x_pos, y_pos, f"{percent:.1f}%",
# ha='center', va='center',
# fontsize=10, color='black',
# bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
# )
# cumulative_height += count
# # Setting labels and title
# ax.set_xlabel('Housing Block')
# ax.set_ylabel('Number of Complaints')
# ax.set_title('Complaints by Housing Block and Type')
# # Display the plot in Streamlit
# st.pyplot(fig)
# # Writeup
# st.write("""
# **What this visualization shows:**
# This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
# **Why it's interesting:**
# By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
# **Color Scheme:**
# The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
# """)
# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal - https://data.urbanaillinois.us/Environment/Nuisance-Complaints/tsn9-95m3/about_data ")
|