Spaces:

Shrek29
/

group-3-final-project

Sleeping

File size: 8,070 Bytes

ba61270

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px
from datetime import datetime

# Set page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")

# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
**Team Members:** 
* Lu Chang (luchang2@illinois.edu)
* Qiming Li (qimingl4@illinois.edu)
* Ruchita Alate (ralate2@illinois.edu)
* Shreyas Kulkarni (ssk16@illinois.edu)
* Vishal Devulapalli (nsd3@illinois.edu)
""")
st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")

# Load and clean data
@st.cache_data
def load_and_clean_data():
    try:
        # Load data
        data = pd.read_csv('Nuisance_Complaints.csv')
        
        # Drop rows with missing 'File Number'
        data = data.dropna(subset=['File Number'])
        
        # Convert dates and handle date-related columns
        data['Date Reported'] = pd.to_datetime(data['Date Reported'])
        data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
        data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
        
        # Handle 'Date Notice Mailed or Given'
        median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
            data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
        
        # Handle 'Type of Complaint'
        data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
        
        # Handle 'Disposition'
        most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
            lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
        )
        data['Disposition'] = data.apply(
            lambda row: most_common_disposition[row['Type of Complaint']] 
            if pd.isna(row['Disposition']) else row['Disposition'], 
            axis=1
        )
        
        # Calculate processing time for resolved cases
        data['Processing Time'] = np.where(
            data['File Close Date'].notna(),
            (data['File Close Date'] - data['Date Reported']).dt.days,
            np.nan
        )
        
        # Handle 'Method Submitted'
        data.loc[
            (data['Submitted Online?']) & (data['Method Submitted'].isna()),
            'Method Submitted'
        ] = 'Online'
        data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
        
        # Drop rows with missing critical values
        data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
        
        # Extract and clean location data
        data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
        data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
        
        # Ensure Year Reported is integer
        data['Year Reported'] = data['Year Reported'].astype(int)
        
        return data
        
    except Exception as e:
        st.error(f"Error in data preprocessing: {str(e)}")
        raise e

# Load the data
try:
    data = load_and_clean_data()
    st.success("Data successfully loaded and cleaned!")
except Exception as e:
    st.error(f"Error loading data: {str(e)}")
    st.stop()

# Create sidebar

st.sidebar.header("Dashboard Controls")

# Get unique years and convert to list for selectbox
year_list = sorted(data['Year Reported'].unique().tolist())
year_options = ['All Time'] + [int(year) for year in year_list]  # Convert years to integers

selected_year = st.sidebar.selectbox(
    "Select Year",
    options=year_options,
)
# Add visualization type selector
viz_type = st.sidebar.selectbox(
    "Select Visualization",
    ["Complaint Types", "Geographic Distribution", "Resolution Status",
     "Submission Methods", "Complaints by Disposition"]
)


# Filter data based on selected year
if selected_year == 'All Time':
    filtered_data = data  # Use complete dataset when 'All Time' is selected
else:
    filtered_data = data[data['Year Reported'] == selected_year]

# Update header text
if selected_year == 'All Time':
    st.header("Analysis for All Time")
else:
    st.header(f"Analysis for Year {selected_year}")
# Main content

# Create metrics
# Create metrics
# Create metrics
# Create metrics
# Create metrics
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Total Complaints", len(filtered_data))
with col2:
    avg_time = filtered_data['Processing Time'].mean()
    st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
with col3:
    if not filtered_data.empty:
        most_common = filtered_data['Type of Complaint'].value_counts().index[0]
        st.metric("Most Common Type", most_common)
    else:
        st.metric("Most Common Type", "N/A")
if viz_type == "Complaint Types":
    # Interactive Pie Chart
    st.subheader("Interactive Complaint Types Pie Chart")
    complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
    complaint_counts.columns = ['Complaint Type', 'Count']

    fig = px.pie(
        complaint_counts,
        names='Complaint Type',
        values='Count',
        title=f'Complaint Types Distribution in {selected_year}',
        hole=0.4  # Donut style
    )
    fig.update_traces(textinfo='percent+label')
    st.plotly_chart(fig, use_container_width=True)

elif viz_type == "Geographic Distribution":
    # Clustered Heatmap
    st.subheader("Clustered Heatmap of Complaints")
    map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=12)

    heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
    HeatMap(heat_data).add_to(m)

    st_data = st_folium(m, width=700, height=500)


elif viz_type == "Resolution Status":
    st.subheader("Complaint Resolution Status")
    fig, ax = plt.subplots(figsize=(10, 6))
    resolution_counts = filtered_data['Disposition'].value_counts()
    sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
    plt.title(f'Resolution Status Distribution in {selected_year}')
    st.pyplot(fig)

elif viz_type == "Submission Methods":
    st.subheader("Submission Methods Analysis")
    fig, ax = plt.subplots(figsize=(10, 6))
    submission_counts = filtered_data['Method Submitted'].value_counts()
    sns.barplot(x=submission_counts.values, y=submission_counts.index)
    plt.title(f'Submission Methods in {selected_year}')
    st.pyplot(fig)


elif viz_type == "Complaints by Disposition":
    st.subheader("Complaints by Disposition")
    disposition_counts = filtered_data['Disposition'].value_counts()
    
    if not disposition_counts.empty:
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
        ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
        ax.set_xlabel('Number of Complaints', fontsize=12)
        ax.set_ylabel('Disposition', fontsize=12)
        st.pyplot(fig)
    else:
        st.write("No data available for the selected year.")

# Additional insights
st.header("Key Insights")
col1, col2 = st.columns(2)

with col1:
    st.subheader("Top 3 Complaint Types")
    top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
    st.write(top_complaints)

with col2:
    st.subheader("Resolution Efficiency")
    resolution_rate = (filtered_data['Disposition'].value_counts() /
                      len(filtered_data) * 100).round(2)
    st.write(resolution_rate)

# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal")