Spaces:

ralate2
/

Group3FinalProjectPart3

Sleeping

File size: 20,041 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt
from datetime import datetime
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px

# Set page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")

# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
**Team Members:** 
* Lu Chang (luchang2@illinois.edu)
* Qiming Li (qimingl4@illinois.edu)
* Ruchita Alate (ralate2@illinois.edu)
* Shreyas Kulkarni (ssk16@illinois.edu)
* Vishal Devulapalli (nsd3@illinois.edu)

This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
""")

# Load and clean data
@st.cache_data
def load_and_clean_data():
    data = pd.read_csv('Nuisance_Complaints_20241130.csv')
    data = data.dropna(subset=['File Number'])
    data['Date Reported'] = pd.to_datetime(data['Date Reported'])
    data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
    data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')

    # Fill missing dates
    median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
    data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)

    data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
    most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
        lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
    )
    data['Disposition'] = data.apply(
        lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
    )

    data['Processing Time'] = np.where(
        data['File Close Date'].notna(),
        (data['File Close Date'] - data['Date Reported']).dt.days,
        np.nan
    )

    data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
    data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
    data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])

    data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
    data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
    data['Year Reported'] = data['Year Reported'].astype(int)
    data['Month Reported'] = data['Date Reported'].dt.month
    return data

# Load the data
data = load_and_clean_data()

# Sidebar for controls
st.sidebar.header("Dashboard Controls")

# First, define the visualization type (viz_type) selection
viz_type = st.sidebar.selectbox("Select Visualization", [
    "Complaint Types", "Geographic Distribution", "Complaints by Disposition", "Submission Methods","Monthly Trends by Complaint Type",
    "Complaints Over Time", "Complaints by Housing Block and Type"
])

# Remove the year selection when certain visualizations are selected
if viz_type not in ["Complaints Over Time", "Complaints by Housing Block and Type"]:
    year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
    selected_year = st.sidebar.selectbox("Select Year", options=year_options)
else:
    selected_year = 'All Time'  # Default to 'All Time' if visualization doesn't require year

# Date Range Selector for Complaints Over Time (only show when Complaints Over Time is selected)
if viz_type == "Complaints Over Time":
    start_date = st.sidebar.date_input("Start Date", pd.to_datetime("2020-01-01"))
    end_date = st.sidebar.date_input("End Date", pd.to_datetime("2024-12-31"))

# Allow the user to select the type of chart (Bar or Pie) for Submission Methods
if viz_type == "Submission Methods":
    plot_type = st.sidebar.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])

# Dropdown for Housing Block (only show when Complaints by Housing Block and Type is selected)
if viz_type == "Complaints by Housing Block and Type":
    block_options = ['All Blocks'] + sorted(data['Housing Block'].unique().tolist())
    selected_block = st.sidebar.selectbox("Select Housing Block", options=block_options)

# Ensure selected_block is only used if defined
if viz_type == "Complaints by Housing Block and Type" and 'selected_block' not in locals():
    selected_block = 'All Blocks'  # Default to 'All Blocks' if no selection made

# Filter data based on selected year
if selected_year != 'All Time':
    filtered_data = data[data['Year Reported'] == selected_year]
else:
    filtered_data = data  

# Further filter by Housing Block
if 'selected_block' in locals() and selected_block != 'All Blocks':
    filtered_data = filtered_data[filtered_data['Housing Block'] == selected_block]

# Filter data based on date range (only for Complaints Over Time visualization)
if viz_type == "Complaints Over Time":
    filtered_data_time = filtered_data[
        (filtered_data['Date Reported'] >= pd.to_datetime(start_date)) & 
        (filtered_data['Date Reported'] <= pd.to_datetime(end_date))
    ]
else:
    filtered_data_time = filtered_data

# Filter data based on selected housing block (only for Complaints by Housing Block visualization)
if viz_type == "Complaints by Housing Block and Type" and selected_block != 'All Blocks':
    filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]

# Header for selected year
st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")


# Display metrics
col1, col2 = st.columns(2)
with col1:
    st.metric("Total Complaints", len(filtered_data))
with col2:
    most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
    st.metric("Most Common Type", most_common)

# Visualizations
if viz_type == "Complaint Types":
    st.subheader("Top 5 Complaint Types Pie Chart")
    
    # Prepare data: Select the top 5 complaint types
    top_complaints = filtered_data['Type of Complaint'].value_counts().nlargest(5).reset_index()
    top_complaints.columns = ['Complaint Type', 'Count']
    
    # Create an interactive pie chart with the 'inferno' color scheme
    fig = px.pie(
        top_complaints,
        names='Complaint Type',
        values='Count',
        title="Top 5 Complaint Types Distribution",
        color_discrete_sequence=px.colors.sequential.Inferno, 
        labels={"Count": "Number of Complaints", "Complaint Type": "Type of Complaint"},
        hover_data=['Count']
    )
    fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
    
    # Display chart
    st.plotly_chart(fig, use_container_width=True)
    
    # Updated write-up
    st.write("""
    **What this visualization shows:**  
    This interactive pie chart displays the distribution of the top 5 complaint types by year.  
    **Why it's interesting:**  
    Hovering over each segment reveals detailed information, including the complaint type, the number of complaints, and its percentage of the total. By focusing on the top 5 complaint categories, this visualization helps identify the most commonly reported issues, enabling better prioritization of resources and targeted interventions.  
    **Color Scheme:**  
    Each complaint type is represented by a unique color from the 'inferno' color scheme, which visually distinguishes between categories and makes the chart more engaging.  
    """)

elif viz_type == "Geographic Distribution":
    st.subheader("Clustered Heatmap of Complaints")
    
    # Generate the heatmap
    map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=12)
    heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
    HeatMap(heat_data).add_to(m)
    
    # Display the map
    st_folium(m, width=700, height=500)
    
    # Write-up
    st.write("""
    **What this visualization shows:**  
    This clustered heatmap visualizes complaint locations across the city.  
    **Why it's interesting:**  
    It highlights geographic areas with higher complaint densities, known as hotspots. These areas can be targeted for intervention and resource allocation.  
    **Color Scheme:**  
    The heatmap uses gradient colors, where warmer tones (red/orange) represent higher densities of complaints, providing a clear visual cue for problem areas.  
    """)


# elif viz_type == "Resolution Status":
#     st.subheader("Interactive Complaint Resolution Status")
#     resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
#     resolution_counts.columns = ['Disposition', 'Count']
#     chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
#         theta=alt.Theta(field="Count", type="quantitative"),
#         color=alt.Color(field="Disposition", type="nominal"),
#         tooltip=[
#             alt.Tooltip("Disposition", title="Resolution"),
#             alt.Tooltip("Count", title="Count")
#         ]
#     )
#     st.altair_chart(chart, use_container_width=True)

#     st.write("""
#     **What this visualization shows:**  
#     This interactive donut chart displays the distribution of complaint resolutions, such as resolved, unresolved, or escalated cases.  
#     **Why it's interesting:**  
#     By analyzing the resolution status, we can assess the effectiveness of complaint handling and identify areas for improvement.  
#     **Color Scheme:**  
#     Each resolution status is represented by a distinct color, making it easy to differentiate between categories and quickly interpret the data.  
#     """)
# We collectively decided to opt for more engaging and interactive charts instead of the above chart.

# Submission Methods Analysis
elif viz_type == "Submission Methods":
    st.subheader("Submission Methods Analysis")

    # Allow the user to select the type of chart (Bar or Pie)
    plot_type = st.selectbox("Select Plot Type", options=["Bar Chart", "Pie Chart"])

    # Get the top 5 submission methods
    submission_counts = filtered_data['Method Submitted'].value_counts().nlargest(5)
    submission_data = submission_counts.reset_index()
    submission_data.columns = ['Submission Method', 'Count']

    if plot_type == "Bar Chart":
        # Create a bar chart with Seaborn
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(x=submission_data['Count'], y=submission_data['Submission Method'], palette='inferno', ax=ax)
        st.pyplot(fig)
    
    elif plot_type == "Pie Chart":
        # Create an interactive pie chart with Plotly
        fig = px.pie(
            submission_data,
            names='Submission Method',
            values='Count',
            title="Top 5 Submission Methods Distribution",
            color_discrete_sequence=px.colors.inferno,
            labels={"Count": "Number of Complaints", "Submission Method": "Method Submitted"},
            hover_data=['Count']
        )
        fig.update_traces(textinfo='percent+label', hovertemplate='<b>%{label}</b><br>Complaints: %{value}<br>Percentage: %{percent}')
        st.plotly_chart(fig, use_container_width=True)

    # Updated write-up
    st.write("""
    **What this visualization shows:**  
    This chart shows the number of complaints submitted via different methods, such as email, phone, online form, etc., with a focus on the top 5 submission methods.
    
    **Why it's interesting:**  
    By analyzing submission methods, we can understand how users prefer to submit complaints. This insight helps in focusing efforts on improving the most used channels, ensuring better user engagement.
    
    **Color Scheme:**  
    The 'inferno' color palette highlights differences in submission frequency, with darker shades representing higher submission counts.
    """)



# elif viz_type == "Complaints by Disposition":
#     st.subheader("Complaints by Disposition")
#     disposition_counts = filtered_data['Disposition'].value_counts()
#     fig, ax = plt.subplots(figsize=(10, 6))
#     sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
#     st.pyplot(fig)

#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the distribution of complaints by their resolution status (disposition), such as 'Resolved', 'Unresolved', etc.  
#     **Why it's interesting:**  
#     By examining the disposition of complaints, organizations can assess how effectively issues are being addressed and identify any areas needing improvement.  
#     **Color Scheme:**  
#     The 'viridis' color palette highlights differences in complaint resolution status, with lighter shades indicating a higher frequency of resolved complaints.  
#     """)

elif viz_type == "Complaints by Disposition":
    st.subheader("Complaints by Disposition")
    
    # Group by Complaint Type and Disposition to make the analysis more complex
    disposition_by_complaint_type = filtered_data.groupby(['Type of Complaint', 'Disposition']).size().unstack(fill_value=0)
    
    # Plot the bar chart with the 'inferno' color scheme
    fig, ax = plt.subplots(figsize=(12, 8))
    disposition_by_complaint_type.plot(kind='bar', stacked=True, colormap='inferno', ax=ax)
    
    ax.set_title("Complaints by Disposition and Complaint Type")
    ax.set_ylabel("Number of Complaints")
    ax.set_xlabel("Complaint Type")
    
    # Display the plot
    st.pyplot(fig)

    st.write("""
    **What this visualization shows:**  
    This stacked bar chart displays the distribution of complaints by their disposition for each complaint type. The bars represent different complaint categories, and each bar is broken down by the resolution status.
    
    **Why it's interesting:**  
    By combining complaint type and resolution status, this chart allows organizations to assess not only how many complaints are resolved or unresolved but also which types of complaints are most frequently resolved or still pending. This helps in identifying patterns in complaint resolution and provides insights into which complaint categories may need more attention to resolve.
    
    **Color Scheme:**  
    The 'inferno' color scheme is used to differentiate between the various disposition statuses, with each status getting a unique shade. This gradient of colors helps visualize the proportions and makes the chart more visually engaging. Lighter shades correspond to a higher frequency of a particular disposition status in the given complaint type.
    """)



elif viz_type == "Monthly Trends by Complaint Type":
    st.subheader("Monthly Trends Grouped by Complaint Types")
    monthly_trends = filtered_data.groupby(['Month Reported', 'Type of Complaint']).size().reset_index(name='Count')
    chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
        x=alt.X('Month Reported:O', title='Month'),
        y=alt.Y('Count:Q', title='Number of Complaints'),
        color='Type of Complaint:N'
    )
    st.altair_chart(chart, use_container_width=True)

    st.write("""
    **What this visualization shows:**  
    This line chart visualizes the monthly trends in complaint counts, grouped by complaint type. It allows tracking changes in complaint frequencies over time and identifying patterns or spikes in specific categories.
    
    **Why it's interesting:**  
    By visualizing these trends, you can identify whether certain complaint types are seasonal or are influenced by specific events. This information helps prioritize resources and refine strategies for complaint management.
    
    **Color Scheme:**  
    Different complaint types are represented by distinct colors, enabling easy comparison of trends across categories.
    """)


# elif viz_type == "Top Complaint Types":
#     st.subheader("Top Complaint Types")
#     complaint_counts = filtered_data['Type of Complaint'].value_counts().head(10)
#     fig, ax = plt.subplots()
#     sns.barplot(x=complaint_counts.values, y=complaint_counts.index, palette="inferno", ax=ax)
#     st.pyplot(fig)

#     st.write("""
#     **What this visualization shows:**  
#     This bar chart displays the top 10 most common complaint types based on the number of occurrences. It provides a clear view of the most frequently reported issues.

#     **Why it's interesting:**  
#     By focusing on the top complaint types, organizations can identify and prioritize the issues that impact the majority of their users or customers. This can lead to targeted improvements in service or support efforts.

#     **Color Scheme:**  
#     The 'inferno' palette is used to emphasize the frequency of each complaint type, with darker shades representing higher frequencies.
#     """)

elif viz_type == "Complaints Over Time":
    st.subheader("Complaints Over Time")
    complaints_over_time = filtered_data_time.groupby(filtered_data_time['Date Reported'].dt.date).size()
    
    fig, ax = plt.subplots()
    ax.plot(complaints_over_time.index, complaints_over_time.values, marker='o', color='tab:purple')
    ax.set_title("Complaints Over Time")
    st.pyplot(fig)

    st.write("""
    **Write-up:** This visualization displays the trend of complaints over time using a line chart. 
    It shows the number of complaints reported for each day, making it easy to spot peaks or declines in complaints. 
    The use of a blue color scheme highlights the flow and continuity of the data, providing a clear view of the patterns over time.
    """)

elif viz_type == "Complaints by Housing Block and Type":
    st.subheader("Complaints by Housing Block and Type")
    
    # Filter the data based on the selected year and housing block
    filtered_data_time = filtered_data  # Use filtered_data if date range is not needed
    if selected_year != 'All Time':
        filtered_data_time = filtered_data_time[filtered_data_time['Year Reported'] == selected_year]
    
    # Further filter by Housing Block (if applicable)
    if selected_block != 'All Blocks':
        filtered_data_time = filtered_data_time[filtered_data_time['Housing Block'] == selected_block]
    
    # Pivot the data based on the filtered data
    complaint_pivot = filtered_data_time.pivot_table(
        index='Housing Block',
        columns='Type of Complaint',
        values='Disposition',
        aggfunc='count',
        fill_value=0
    )
    
    # Ensure the pivoted data is numeric for plotting
    complaint_pivot = complaint_pivot.astype(float)

    # Plot the data
    fig = complaint_pivot.plot(kind='bar', stacked=True, colormap='inferno', figsize=(10, 6)).get_figure()
    st.pyplot(fig)
    
    st.write("""
    **What this visualization shows:**  
    This line chart shows the trend of complaints over time, displaying the number of complaints reported for each day. It helps identify patterns, peaks, and trends in the complaints data.

    **Why it's interesting:**  
    By visualizing complaints over time, organizations can track the effectiveness of their responses and interventions. It also helps identify recurring spikes or declines in complaints, which could be indicative of specific events or issues.

    **Insight Potential:**  
    This chart can be used to correlate with external factors, such as changes in policies, incidents, or other events that might lead to a sudden increase or decrease in complaints. The insights can inform better decision-making and improve the organization's complaint management strategy.
    """)

# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal.")