ralate2's picture
Update app.py
1430402 verified
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from datetime import datetime
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px
# Setting page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
* By Ruchita Alate (ralate2@illinois.edu)
* This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
""")
# Loading and cleaning data
@st.cache_data
def load_and_clean_data():
data = pd.read_csv('Nuisance_Complaints_20241130.csv')
data = data.dropna(subset=['File Number'])
data['Date Reported'] = pd.to_datetime(data['Date Reported'])
data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
# Filling missing dates
median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
)
data['Disposition'] = data.apply(
lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
)
data['Processing Time'] = np.where(
data['File Close Date'].notna(),
(data['File Close Date'] - data['Date Reported']).dt.days,
np.nan
)
data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
data['Year Reported'] = data['Year Reported'].astype(int)
data['Month Reported'] = data['Date Reported'].dt.month
# Preprocessing the 'housing Block' column
data['Housing Block'] = data['Housing Block'].str.strip().str.lower()
return data
# Loading the data
data = load_and_clean_data()
# Sidebar for controls
st.sidebar.header("Dashboard Controls")
# Defining the visualization type (viz_type) selection
viz_type = st.sidebar.selectbox("Select Visualization", [
"Complaint Types", "Geographic Distribution", "Complaints by Disposition", "Submission Methods",
"Monthly Trends by Complaint Type", "Complaints Over Time", "Complaints by Housing Block and Type",
], key="viz_type_select")
# Remove the year selection when certain visualizations are selected
if viz_type not in ["Complaints Over Time", "Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"]:
year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
selected_year = st.sidebar.selectbox("Select Year", options=year_options, key="year_select")
else:
selected_year = 'All Time' # Default to 'All Time' if visualization doesn't require year
# Date Range Selector for Complaints Over Time (only show when Complaints Over Time is selected)
if viz_type == "Complaints Over Time":
start_date = st.sidebar.date_input("Start Date", pd.to_datetime("2020-01-01"), key="start_date")
end_date = st.sidebar.date_input("End Date", pd.to_datetime("2024-12-31"), key="end_date")
# Dropdown for Housing Block (only show when Complaints by Housing Block and Type or the updated version is selected)
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"]:
# block_options = ['All Blocks'] + sorted(data['Housing Block'].unique().tolist())
# selected_block = st.sidebar.selectbox(
# "Select Housing Block",
# options=block_options,
# key=f"block_select_{viz_type}" # Unique key for each visualization
# )
valid_blocks = [
block for block in data['Housing Block'].unique()
if block.split()[0].isdigit() and int(block.split()[0]) < 3400
]
block_options = ['All Blocks'] + sorted(valid_blocks)
selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")
# Ensuring selected_block is only used if defined
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"] and 'selected_block' not in locals():
selected_block = 'All Blocks' # Default to 'All Blocks' if no selection made
# Filtering the data based on selected year
filtered_data_time = data # Use filtered_data if date range is not needed
if selected_year != 'All Time':
filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# Exclude blocks from the data
filtered_data_time = filtered_data_time[~filtered_data_time['Housing Block'].isin(excluded_blocks)]
# Get the list of blocks excluding the unwanted ones
available_blocks = sorted(filtered_data_time['Housing Block'].unique().tolist())
# Dropdown for Housing Block (excluding unwanted blocks)
block_options = ['All Blocks'] + available_blocks
selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options, key="block_select")
# Further filtering by selected Housing Block (if applicable)
if selected_block != 'All Blocks':
filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# Filtering data based on selected year
if selected_year != 'All Time':
filtered_data = data[data['Year Reported'] == selected_year]
else:
filtered_data = data
# Further filtering by Housing Block
if 'selected_block' in locals() and selected_block != 'All Blocks':
filtered_data = filtered_data[filtered_data['Housing Block'] == selected_block]
# Filtering data based on date range (only for Complaints Over Time visualization)
if viz_type == "Complaints Over Time":
filtered_data_time = filtered_data[
(filtered_data['Date Reported'] >= pd.to_datetime(start_date)) &
(filtered_data['Date Reported'] <= pd.to_datetime(end_date))
]
else:
filtered_data_time = filtered_data
# Filtering data based on selected housing block (only for Complaints by Housing Block and the updated version)
if viz_type in ["Complaints by Housing Block and Type", "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)"] and selected_block != 'All Blocks':
filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# Header for selected year
st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")
# Displaying metrics
col1, col2 = st.columns(2)
with col1:
st.metric("Total Complaints", len(filtered_data))
with col2:
most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
st.metric("Most Common Type", most_common)
# Visualizations
if viz_type == "Complaint Types":
st.subheader("Top 5 Complaint Types Pie Chart")
# Preparing data: Select the top 5 complaint types
top_complaints = filtered_data['Type of Complaint'].value_counts().nlargest(5).reset_index()
top_complaints.columns = ['Complaint Type', 'Count']
# Creating an interactive pie chart with the 'inferno' color scheme
fig = px.pie(
top_complaints,
names='Complaint Type',
values='Count',
title="Top 5 Complaint Types Distribution",
color_discrete_sequence=px.colors.sequential.Inferno,
labels={"Count": "Number of Complaints", "Complaint Type": "Type of Complaint"},
hover_data=['Count']
)
fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
# Displaying chart
st.plotly_chart(fig, use_container_width=True)
# write-up
st.write("""
**What this visualization shows:**
This interactive pie chart displays the distribution of the top 5 complaint types by year.
**Why it's interesting:**
Hovering over each segment reveals detailed information, including the complaint type, the number of complaints, and its percentage of the total. By focusing on the top 5 complaint categories, this visualization helps identify the most commonly reported issues, enabling better prioritization of resources and targeted interventions.
**Color Scheme:**
Each complaint type is represented by a unique color from the 'inferno' color scheme, which visually distinguishes between categories and makes the chart more engaging.
""")
elif viz_type == "Geographic Distribution":
st.subheader("Clustered Heatmap of Complaints")
# Generating the heatmap
map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)
heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
HeatMap(heat_data).add_to(m)
# Displaying the map
st_folium(m, width=700, height=500)
# Write-up
st.write("""
**What this visualization shows:**
This clustered heatmap visualizes complaint locations across the city.
**Why it's interesting:**
It highlights geographic areas with higher complaint densities, known as hotspots. These areas can be targeted for intervention and resource allocation.
**Color Scheme:**
The heatmap uses gradient colors, where warmer tones (red/orange) represent higher densities of complaints, providing a clear visual cue for problem areas.
""")
# elif viz_type == "Resolution Status":
# st.subheader("Interactive Complaint Resolution Status")
# resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
# resolution_counts.columns = ['Disposition', 'Count']
# chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
# theta=alt.Theta(field="Count", type="quantitative"),
# color=alt.Color(field="Disposition", type="nominal"),
# tooltip=[
# alt.Tooltip("Disposition", title="Resolution"),
# alt.Tooltip("Count", title="Count")
# ]
# )
# st.altair_chart(chart, use_container_width=True)
# st.write("""
# **What this visualization shows:**
# This interactive donut chart displays the distribution of complaint resolutions, such as resolved, unresolved, or escalated cases.
# **Why it's interesting:**
# By analyzing the resolution status, we can assess the effectiveness of complaint handling and identify areas for improvement.
# **Color Scheme:**
# Each resolution status is represented by a distinct color, making it easy to differentiate between categories and quickly interpret the data.
# """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.
# Submission Methods Analysis
elif viz_type == "Submission Methods":
st.subheader("Submission Methods Analysis")
# Allowing the user to select the type of chart (Bar or Pie)
plot_type = st.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])
# Getting the top 5 submission methods
submission_counts = filtered_data['Method Submitted'].value_counts().nlargest(5)
submission_data = submission_counts.reset_index()
submission_data.columns = ['Submission Method', 'Count']
if plot_type == "Bar Chart":
# Creating a bar chart with Seaborn
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x=submission_data['Count'], y=submission_data['Submission Method'], palette='inferno', ax=ax)
st.pyplot(fig)
elif plot_type == "Pie Chart":
# Creating an interactive pie chart with Plotly
fig = px.pie(
submission_data,
names='Submission Method',
values='Count',
title="Top 5 Submission Methods Distribution",
color='Submission Method', # Automatically generates colors for each segment
color_discrete_sequence=px.colors.sequential.Inferno, # Correct reference to Inferno color scale
labels={"Count": "Number of Complaints", "Submission Method": "Method Submitted"},
hover_data=['Count']
)
fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
st.plotly_chart(fig, use_container_width=True)
# Write-up
st.write("""
**What this visualization shows:**
This chart shows the number of complaints submitted via different methods, such as email, phone, online form, etc., with a focus on the top 5 submission methods. If one wishes to see the percentage wise distribution, pie chart option can be selected from the interactivity option provided.
**Why it's interesting:**
By analyzing submission methods, we can understand how users prefer to submit complaints. This insight helps in focusing efforts on improving the most used channels, ensuring better user engagement.
**Color Scheme:**
The 'inferno' color palette highlights differences in submission frequency, with darker shades representing higher submission counts.
""")
# elif viz_type == "Complaints by Disposition":
# st.subheader("Complaints by Disposition")
# disposition_counts = filtered_data['Disposition'].value_counts()
# fig, ax = plt.subplots(figsize=(10, 6))
# sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
# st.pyplot(fig)
# st.write("""
# **What this visualization shows:**
# This bar chart displays the distribution of complaints by their resolution status (disposition), such as 'Resolved', 'Unresolved', etc.
# **Why it's interesting:**
# By examining the disposition of complaints, organizations can assess how effectively issues are being addressed and identify any areas needing improvement.
# **Color Scheme:**
# The 'viridis' color palette highlights differences in complaint resolution status, with lighter shades indicating a higher frequency of resolved complaints.
# """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.
elif viz_type == "Complaints by Disposition":
st.subheader("Complaints by Disposition")
# Grouping by Complaint Type and Disposition to make the analysis more complex
disposition_by_complaint_type = filtered_data.groupby(['Type of Complaint', 'Disposition']).size().unstack(fill_value=0)
# Plotting the bar chart with the 'inferno' color scheme
fig, ax = plt.subplots(figsize=(12, 8))
disposition_by_complaint_type.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
ax.set_title("Complaints by Disposition and Complaint Type")
ax.set_ylabel("Number of Complaints")
ax.set_xlabel("Complaint Type")
# Displaying the plot
st.pyplot(fig)
st.write("""
**What this visualization shows:**
This stacked bar chart displays the distribution of complaints by their disposition for each complaint type. The bars represent different complaint categories, and each bar is broken down by the resolution status.
**Why it's interesting:**
By combining complaint type and resolution status, this chart allows organizations to assess not only how many complaints are resolved or unresolved but also which types of complaints are most frequently resolved or still pending. This helps in identifying patterns in complaint resolution and provides insights into which complaint categories may need more attention to resolve.
**Color Scheme:**
The 'inferno' color scheme is used to differentiate between the various disposition statuses, with each status getting a unique shade. This gradient of colors helps visualize the proportions and makes the chart more visually engaging. Lighter shades correspond to a higher frequency of a particular disposition status in the given complaint type.
""")
elif viz_type == "Monthly Trends by Complaint Type":
st.subheader("Monthly Trends Grouped by Complaint Types")
monthly_trends = filtered_data.groupby(['Month Reported', 'Type of Complaint']).size().reset_index(name='Count')
chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
x=alt.X('Month Reported:O', title='Month'),
y=alt.Y('Count:Q', title='Number of Complaints'),
color='Type of Complaint:N'
)
st.altair_chart(chart, use_container_width=True)
st.write("""
**What this visualization shows:**
This line chart visualizes the monthly trends in complaint counts, grouped by complaint type. It allows tracking changes in complaint frequencies over time and identifying patterns or spikes in specific categories.
**Why it's interesting:**
By visualizing these trends, we can identify whether certain complaint types are seasonal or are influenced by specific events. This information helps prioritize resources and refine strategies for complaint management.
**Color Scheme:**
Different complaint types are represented by distinct colors, enabling easy comparison of trends across categories.
""")
# elif viz_type == "Top Complaint Types":
# st.subheader("Top Complaint Types")
# complaint_counts = filtered_data['Type of Complaint'].value_counts().head(10)
# fig, ax = plt.subplots()
# sns.barplot(x=complaint_counts.values, y=complaint_counts.index, palette="inferno", ax=ax)
# st.pyplot(fig)
# st.write("""
# **What this visualization shows:**
# This bar chart displays the top 10 most common complaint types based on the number of occurrences. It provides a clear view of the most frequently reported issues.
# **Why it's interesting:**
# By focusing on the top complaint types, organizations can identify and prioritize the issues that impact the majority of their users or customers. This can lead to targeted improvements in service or support efforts.
# **Color Scheme:**
# The 'inferno' palette is used to emphasize the frequency of each complaint type, with darker shades representing higher frequencies.
# """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.
elif viz_type == "Complaints Over Time":
st.subheader("Complaints Over Time")
complaints_over_time = filtered_data_time.groupby(filtered_data_time['Date Reported'].dt.date).size()
fig, ax = plt.subplots()
ax.plot(complaints_over_time.index, complaints_over_time.values, marker='o', color='tab:purple')
ax.set_title("Complaints Over Time")
st.pyplot(fig)
st.write("""
**What this visualization shows:**
This visualization displays the trend of complaints over time using a line chart. It shows the number of complaints reported , making it easy to spot peaks or declines in complaints.
**Why it's interesting:**
By examining the complaints over time, users can identify patterns, such as specific days or periods with higher or lower complaint volumes. This can inform decision-making and help allocate resources more effectively during high-complaint periods.
**Color Scheme:**
The use of a purple color scheme highlights the flow and continuity of the data, providing a clear view of the patterns over time.
""")
elif viz_type == "Complaints by Housing Block and Type":
st.subheader("Complaints by Housing Block and Type")
# Filtering the data based on the selected year and housing block
filtered_data_time = filtered_data # Use filtered_data if date range is not needed
if selected_year != 'All Time':
filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# Further filtering by Housing Block (if applicable)
if selected_block != 'All Blocks':
filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# Pivoting the data based on the filtered data
complaint_pivot = filtered_data_time.pivot_table(
index='Housing Block',
columns='Type of Complaint',
values='Disposition',
aggfunc='count',
fill_value=0
)
# Ensuring the pivoted data is numeric for plotting
complaint_pivot = complaint_pivot.astype(float)
# Plotting the data
fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
st.pyplot(fig)
st.write("""
**What this visualization shows:**
This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the total number of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks.
**Why it's interesting:**
By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
**Color Scheme:**
The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
""")
# elif viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
# st.subheader("Complaints by Housing Block and Type- Incorporating Suggestions Based on Professor's Feedback")
# # Filtering the data based on the selected year and housing block
# filtered_data_time = data # Use filtered_data if date range is not needed
# if selected_year != 'All Time':
# filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# # Further filtering by Housing Block (if applicable)
# if selected_block != 'All Blocks':
# filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# # Pivoting the data based on the filtered data
# complaint_pivot = filtered_data_time.pivot_table(
# index='Housing Block',
# columns='Type of Complaint',
# values='Disposition',
# aggfunc='count',
# fill_value=0
# )
# # Ensuring the pivoted data is numeric for plotting
# complaint_pivot = complaint_pivot.astype(float)
# # Desired order for the housing blocks
# desired_order = [
# '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
# '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
# '1200 block', '1300 block', '1400 block', '1500 block', '1600 block',
# '1700 block', '1800 block', '1900 block', '2000 block', '2100 block',
# '2200 block', '2300 block', '2400 block', '2500 block', '2600 block',
# '2700 block', '2800 block', '2900 block', '3000 block', '3100 block',
# '3200 block', '3300 block', '3400 block', '3500 block', '3600 block',
# '3700 block', '3800 block', '3900 block', '4000 block', '4100 block',
# '4200 block', '4300 block', '4400 block', '4500 block', '4600 block',
# '4700 block', '4800 block', '4900 block', '5000 block'
# ]
# # Reordering the index of the pivot table according to the desired order
# complaint_pivot = complaint_pivot.reindex(desired_order)
# # Calculating percentages for each complaint type per housing block
# percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
# # Plotting the data
# fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
# # Adding percentage labels to the plot
# ax = fig.gca()
# for idx, block in enumerate(complaint_pivot.index):
# cumulative_height = 0
# for i, complaint_type in enumerate(complaint_pivot.columns):
# count = complaint_pivot.iloc[idx, i]
# percent = percentages.iloc[idx, i]
# if count > 0:
# # Compute the position for the percentage label
# x_pos = idx - 0.4 + 0.8 / 2 # Adjusting the position of the label
# y_pos = cumulative_height + count / 2
# ax.text(
# x_pos, y_pos, f"{percent:.1f}%",
# ha='center', va='center',
# fontsize=10, color='black',
# bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
# )
# cumulative_height += count
# # Display the plot in Streamlit
# st.pyplot(fig)
# # writeup
# st.write("""
# **What this visualization shows:**
# This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
# **Why it's interesting:**
# By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks.
# **Color Scheme:**
# The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
# """)
# In the above code , We incorporated all of the professor's suggestions and refined the chart to make it more useful for analysis while ensuring good aesthetics. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
# if viz_type == "Complaints by Housing Block and Type (Incorporating Suggestions Based on Professor's Feedback)":
# st.subheader("Complaints by Housing Block and Type - Incorporating Suggestions Based on Professor's Feedback")
# # Filtering the data based on the selected year and housing block
# filtered_data_time = data # Use filtered_data if date range is not needed
# if selected_year != 'All Time':
# filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
# # Further filtering by Housing Block (if applicable)
# if selected_block != 'All Blocks':
# filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
# # Pivoting the data based on the filtered data
# complaint_pivot = filtered_data_time.pivot_table(
# index='Housing Block',
# columns='Type of Complaint',
# values='Disposition',
# aggfunc='count',
# fill_value=0
# )
# # Ensuring the pivoted data is numeric for plotting
# complaint_pivot = complaint_pivot.astype(float)
# # Desired order for the housing blocks
# desired_order = [
# '1 block', '100 block', '200 block', '300 block', '400 block', '500 block',
# '600 block', '700 block', '800 block', '900 block', '1000 block', '1100 block',
# '1200 block', '1300 block', '1400 block', '1500 block', '1600 block',
# '1700 block', '1800 block', '1900 block', '2000 block', '2100 block',
# '2200 block', '2300 block', '2400 block', '2500 block', '2600 block',
# '2700 block', '2800 block', '2900 block', '3000 block', '3100 block',
# '3200 block', '3300 block'
# ]
# # Reordering the index of the pivot table according to the desired order
# complaint_pivot = complaint_pivot.reindex(desired_order)
# # If a single block is selected, filter for only that block
# if selected_block != 'All Blocks':
# complaint_pivot = complaint_pivot.loc[[selected_block]]
# # Calculating percentages for each complaint type per housing block
# percentages = complaint_pivot.div(complaint_pivot.sum(axis=1), axis=0) * 100
# # Plotting the data
# fig, ax = plt.subplots(figsize=(10, 6))
# complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
# # Adjusting the x-axis ticks
# if selected_block != 'All Blocks':
# ax.set_xticks([0]) # Only one label
# ax.set_xticklabels([selected_block], rotation=0)
# else:
# # Show every nth label to avoid overcrowding
# tick_spacing = max(1, len(complaint_pivot) // 10) # Adjust based on the number of blocks
# ax.set_xticks(range(0, len(complaint_pivot.index), tick_spacing))
# ax.set_xticklabels(complaint_pivot.index[::tick_spacing], rotation=45, ha='right')
# # Adding percentage labels to the plot
# for idx, block in enumerate(complaint_pivot.index):
# cumulative_height = 0
# for i, complaint_type in enumerate(complaint_pivot.columns):
# count = complaint_pivot.iloc[idx, i]
# percent = percentages.iloc[idx, i]
# if count > 0:
# # Compute the position for the percentage label
# x_pos = idx - 0.4 + 0.8 / 2 # Adjusting the position of the label
# y_pos = cumulative_height + count / 2
# ax.text(
# x_pos, y_pos, f"{percent:.1f}%",
# ha='center', va='center',
# fontsize=10, color='black',
# bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
# )
# cumulative_height += count
# # Setting labels and title
# ax.set_xlabel('Housing Block')
# ax.set_ylabel('Number of Complaints')
# ax.set_title('Complaints by Housing Block and Type')
# # Display the plot in Streamlit
# st.pyplot(fig)
# # Writeup
# st.write("""
# **What this visualization shows:**
# This bar chart displays the distribution of complaints by Housing Block and Complaint Type. The data is stacked to show the percentage of complaints per block, categorized by type. This allows for a quick comparison of the most common complaint types across different housing blocks. While the percentages may be challenging to read when data for all blocks is displayed, they become more valuable and easier to interpret when a single block is selected. Selecting a specific block allows for clearer insights into the proportion of each complaint type within that block, providing more actionable information.
# **Why it's interesting:**
# By analyzing the distribution of complaints by both block and type, organizations can identify specific areas where certain complaint types are more prevalent. This insight helps target interventions and allocate resources more efficiently based on the most common issues in different housing blocks. Given that the data from block 3400 onwards is very sparse, we decided to exclude these records. This adjustment helped focus the visualization on the more relevant data, providing clearer insights and improving its overall effectiveness for analysis.
# **Color Scheme:**
# The 'inferno' color palette is used to represent different complaint types, with darker shades indicating a higher frequency of complaints. The stacked bar chart makes it easy to compare the distribution of complaints by block and type.
# """)
# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal - https://data.urbanaillinois.us/Environment/Nuisance-Complaints/tsn9-95m3/about_data ")