Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import folium | |
| from folium.plugins import HeatMap | |
| from streamlit_folium import st_folium | |
| import plotly.express as px | |
| from datetime import datetime | |
| # Set page config | |
| st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide") | |
| # Title and introduction | |
| st.title("Nuisance Complaints Analysis Dashboard") | |
| st.markdown(""" | |
| **Team Members:** | |
| * Lu Chang (luchang2@illinois.edu) | |
| * Qiming Li (qimingl4@illinois.edu) | |
| * Ruchita Alate (ralate2@illinois.edu) | |
| * Shreyas Kulkarni (ssk16@illinois.edu) | |
| * Vishal Devulapalli (nsd3@illinois.edu) | |
| """) | |
| st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.") | |
| # Load and clean data | |
| def load_and_clean_data(): | |
| try: | |
| # Load data | |
| data = pd.read_csv('Nuisance_Complaints.csv') | |
| # Drop rows with missing 'File Number' | |
| data = data.dropna(subset=['File Number']) | |
| # Convert dates and handle date-related columns | |
| data['Date Reported'] = pd.to_datetime(data['Date Reported']) | |
| data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given']) | |
| data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce') | |
| # Handle 'Date Notice Mailed or Given' | |
| median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median() | |
| data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \ | |
| data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay) | |
| # Handle 'Type of Complaint' | |
| data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown') | |
| # Handle 'Disposition' | |
| most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg( | |
| lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending' | |
| ) | |
| data['Disposition'] = data.apply( | |
| lambda row: most_common_disposition[row['Type of Complaint']] | |
| if pd.isna(row['Disposition']) else row['Disposition'], | |
| axis=1 | |
| ) | |
| # Calculate processing time for resolved cases | |
| data['Processing Time'] = np.where( | |
| data['File Close Date'].notna(), | |
| (data['File Close Date'] - data['Date Reported']).dt.days, | |
| np.nan | |
| ) | |
| # Handle 'Method Submitted' | |
| data.loc[ | |
| (data['Submitted Online?']) & (data['Method Submitted'].isna()), | |
| 'Method Submitted' | |
| ] = 'Online' | |
| data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0]) | |
| # Drop rows with missing critical values | |
| data = data.dropna(subset=['Submitted Online?', 'Mapped Location']) | |
| # Extract and clean location data | |
| data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float) | |
| data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float) | |
| # Ensure Year Reported is integer | |
| data['Year Reported'] = data['Year Reported'].astype(int) | |
| return data | |
| except Exception as e: | |
| st.error(f"Error in data preprocessing: {str(e)}") | |
| raise e | |
| # Load the data | |
| try: | |
| data = load_and_clean_data() | |
| st.success("Data successfully loaded and cleaned!") | |
| except Exception as e: | |
| st.error(f"Error loading data: {str(e)}") | |
| st.stop() | |
| # Create sidebar | |
| st.sidebar.header("Dashboard Controls") | |
| # Get unique years and convert to list for selectbox | |
| year_list = sorted(data['Year Reported'].unique().tolist()) | |
| year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers | |
| selected_year = st.sidebar.selectbox( | |
| "Select Year", | |
| options=year_options, | |
| ) | |
| # Add visualization type selector | |
| viz_type = st.sidebar.selectbox( | |
| "Select Visualization", | |
| ["Complaint Types", "Geographic Distribution", "Resolution Status", | |
| "Submission Methods", "Complaints by Disposition"] | |
| ) | |
| # Filter data based on selected year | |
| if selected_year == 'All Time': | |
| filtered_data = data # Use complete dataset when 'All Time' is selected | |
| else: | |
| filtered_data = data[data['Year Reported'] == selected_year] | |
| # Update header text | |
| if selected_year == 'All Time': | |
| st.header("Analysis for All Time") | |
| else: | |
| st.header(f"Analysis for Year {selected_year}") | |
| # Main content | |
| # Create metrics | |
| # Create metrics | |
| # Create metrics | |
| # Create metrics | |
| # Create metrics | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Total Complaints", len(filtered_data)) | |
| with col2: | |
| avg_time = filtered_data['Processing Time'].mean() | |
| st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A") | |
| with col3: | |
| if not filtered_data.empty: | |
| most_common = filtered_data['Type of Complaint'].value_counts().index[0] | |
| st.metric("Most Common Type", most_common) | |
| else: | |
| st.metric("Most Common Type", "N/A") | |
| if viz_type == "Complaint Types": | |
| # Interactive Pie Chart | |
| st.subheader("Interactive Complaint Types Pie Chart") | |
| complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index() | |
| complaint_counts.columns = ['Complaint Type', 'Count'] | |
| fig = px.pie( | |
| complaint_counts, | |
| names='Complaint Type', | |
| values='Count', | |
| title=f'Complaint Types Distribution in {selected_year}', | |
| hole=0.4 # Donut style | |
| ) | |
| fig.update_traces(textinfo='percent+label') | |
| st.plotly_chart(fig, use_container_width=True) | |
| elif viz_type == "Geographic Distribution": | |
| # Clustered Heatmap | |
| st.subheader("Clustered Heatmap of Complaints") | |
| map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()] | |
| m = folium.Map(location=map_center, zoom_start=12) | |
| heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist() | |
| HeatMap(heat_data).add_to(m) | |
| st_data = st_folium(m, width=700, height=500) | |
| elif viz_type == "Resolution Status": | |
| st.subheader("Complaint Resolution Status") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| resolution_counts = filtered_data['Disposition'].value_counts() | |
| sns.barplot(x=resolution_counts.values, y=resolution_counts.index) | |
| plt.title(f'Resolution Status Distribution in {selected_year}') | |
| st.pyplot(fig) | |
| elif viz_type == "Submission Methods": | |
| st.subheader("Submission Methods Analysis") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| submission_counts = filtered_data['Method Submitted'].value_counts() | |
| sns.barplot(x=submission_counts.values, y=submission_counts.index) | |
| plt.title(f'Submission Methods in {selected_year}') | |
| st.pyplot(fig) | |
| elif viz_type == "Complaints by Disposition": | |
| st.subheader("Complaints by Disposition") | |
| disposition_counts = filtered_data['Disposition'].value_counts() | |
| if not disposition_counts.empty: | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax) | |
| ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14) | |
| ax.set_xlabel('Number of Complaints', fontsize=12) | |
| ax.set_ylabel('Disposition', fontsize=12) | |
| st.pyplot(fig) | |
| else: | |
| st.write("No data available for the selected year.") | |
| # Additional insights | |
| st.header("Key Insights") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Top 3 Complaint Types") | |
| top_complaints = filtered_data['Type of Complaint'].value_counts().head(3) | |
| st.write(top_complaints) | |
| with col2: | |
| st.subheader("Resolution Efficiency") | |
| resolution_rate = (filtered_data['Disposition'].value_counts() / | |
| len(filtered_data) * 100).round(2) | |
| st.write(resolution_rate) | |
| # Footer | |
| st.markdown("---") | |
| st.markdown("Dataset provided by the City of Urbana Open Data Portal") |