Spaces:

ralate2
/

Group3FinalProjectPart3

Sleeping

App Files Files Community

ralate2 commited on Dec 6, 2024

Commit

b7feb2b

verified ·

1 Parent(s): 0adda8b

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -161

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 import folium
 from folium.plugins import HeatMap
 from streamlit_folium import st_folium
@@ -21,118 +22,66 @@ st.markdown("""
 * Ruchita Alate (ralate2@illinois.edu)
 * Shreyas Kulkarni (ssk16@illinois.edu)
 * Vishal Devulapalli (nsd3@illinois.edu)
 """)
-st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
 # Load and clean data
 @st.cache_data
 def load_and_clean_data():
-    try:
-        # Load data
-        data = pd.read_csv('Nuisance_Complaints_20241130.csv')
-        # Drop rows with missing 'File Number'
-        data = data.dropna(subset=['File Number'])
-        # Convert dates and handle date-related columns
-        data['Date Reported'] = pd.to_datetime(data['Date Reported'])
-        data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
-        data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
-        # Handle 'Date Notice Mailed or Given'
-        median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
-        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
-            data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
-        # Handle 'Type of Complaint'
-        data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
-        # Handle 'Disposition'
-        most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
-            lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
-        )
-        data['Disposition'] = data.apply(
-            lambda row: most_common_disposition[row['Type of Complaint']]
-            if pd.isna(row['Disposition']) else row['Disposition'],
-            axis=1
-        )
-        # Calculate processing time for resolved cases
-        data['Processing Time'] = np.where(
-            data['File Close Date'].notna(),
-            (data['File Close Date'] - data['Date Reported']).dt.days,
-            np.nan
-        )
-        # Handle 'Method Submitted'
-        data.loc[
-            (data['Submitted Online?']) & (data['Method Submitted'].isna()),
-            'Method Submitted'
-        ] = 'Online'
-        data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
-        # Drop rows with missing critical values
-        data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
-        # Extract and clean location data
-        data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
-        data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
-        # Ensure Year Reported is integer
-        data['Year Reported'] = data['Year Reported'].astype(int)
-        return data
-    except Exception as e:
-        st.error(f"Error in data preprocessing: {str(e)}")
-        raise e
-# Load the data
-try:
-    data = load_and_clean_data()
-    st.success("Data successfully loaded and cleaned!")
-except Exception as e:
-    st.error(f"Error loading data: {str(e)}")
-    st.stop()
-# Create sidebar
 st.sidebar.header("Dashboard Controls")
-# Get unique years and convert to list for selectbox
-year_list = sorted(data['Year Reported'].unique().tolist())
-year_options = ['All Time'] + [int(year) for year in year_list]  # Convert years to integers
-selected_year = st.sidebar.selectbox(
-    "Select Year",
-    options=year_options,
-)
-# Add visualization type selector
-viz_type = st.sidebar.selectbox(
-    "Select Visualization",
-    ["Complaint Types", "Geographic Distribution", "Resolution Status",
-     "Submission Methods", "Complaints by Disposition"]
-)
-# Filter data based on selected year
-if selected_year == 'All Time':
-    filtered_data = data  # Use complete dataset when 'All Time' is selected
-else:
-    filtered_data = data[data['Year Reported'] == selected_year]
-# Update header text
-if selected_year == 'All Time':
-    st.header("Analysis for All Time")
-else:
-    st.header(f"Analysis for Year {selected_year}")
-# Main content
-# Create metrics
-# Create metrics
-# Create metrics
-# Create metrics
-# Create metrics
 col1, col2, col3 = st.columns(3)
 with col1:
     st.metric("Total Complaints", len(filtered_data))
@@ -140,85 +89,90 @@ with col2:
     avg_time = filtered_data['Processing Time'].mean()
     st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
 with col3:
-    if not filtered_data.empty:
-        most_common = filtered_data['Type of Complaint'].value_counts().index[0]
-        st.metric("Most Common Type", most_common)
-    else:
-        st.metric("Most Common Type", "N/A")
 if viz_type == "Complaint Types":
-    # Interactive Pie Chart
     st.subheader("Interactive Complaint Types Pie Chart")
     complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
     complaint_counts.columns = ['Complaint Type', 'Count']
-    fig = px.pie(
-        complaint_counts,
-        names='Complaint Type',
-        values='Count',
-        title=f'Complaint Types Distribution in {selected_year}',
-        hole=0.4  # Donut style
-    )
-    fig.update_traces(textinfo='percent+label')
     st.plotly_chart(fig, use_container_width=True)
 elif viz_type == "Geographic Distribution":
-    # Clustered Heatmap
     st.subheader("Clustered Heatmap of Complaints")
     map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
     m = folium.Map(location=map_center, zoom_start=12)
     heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
     HeatMap(heat_data).add_to(m)
-    st_data = st_folium(m, width=700, height=500)
 elif viz_type == "Resolution Status":
-    st.subheader("Complaint Resolution Status")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    resolution_counts = filtered_data['Disposition'].value_counts()
-    sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
-    plt.title(f'Resolution Status Distribution in {selected_year}')
-    st.pyplot(fig)
 elif viz_type == "Submission Methods":
     st.subheader("Submission Methods Analysis")
-    fig, ax = plt.subplots(figsize=(10, 6))
     submission_counts = filtered_data['Method Submitted'].value_counts()
-    sns.barplot(x=submission_counts.values, y=submission_counts.index)
-    plt.title(f'Submission Methods in {selected_year}')
     st.pyplot(fig)
 elif viz_type == "Complaints by Disposition":
     st.subheader("Complaints by Disposition")
     disposition_counts = filtered_data['Disposition'].value_counts()
-    if not disposition_counts.empty:
-        fig, ax = plt.subplots(figsize=(10, 6))
-        sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
-        ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
-        ax.set_xlabel('Number of Complaints', fontsize=12)
-        ax.set_ylabel('Disposition', fontsize=12)
-        st.pyplot(fig)
-    else:
-        st.write("No data available for the selected year.")
-# Additional insights
-st.header("Key Insights")
-col1, col2 = st.columns(2)
-with col1:
-    st.subheader("Top 3 Complaint Types")
-    top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
-    st.write(top_complaints)
-with col2:
-    st.subheader("Resolution Efficiency")
-    resolution_rate = (filtered_data['Disposition'].value_counts() /
-                      len(filtered_data) * 100).round(2)
-    st.write(resolution_rate)
-# Footer
-st.markdown("---")
-st.markdown("Dataset provided by the City of Urbana Open Data Portal")

 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+import altair as alt
 import folium
 from folium.plugins import HeatMap
 from streamlit_folium import st_folium
 * Ruchita Alate (ralate2@illinois.edu)
 * Shreyas Kulkarni (ssk16@illinois.edu)
 * Vishal Devulapalli (nsd3@illinois.edu)
+This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
 """)
 # Load and clean data
 @st.cache_data
 def load_and_clean_data():
+    data = pd.read_csv('Nuisance_Complaints_20241204.csv')
+    data = data.dropna(subset=['File Number'])
+    data['Date Reported'] = pd.to_datetime(data['Date Reported'])
+    data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
+    data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
+    # Fill missing dates
+    median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
+    data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
+        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
+    data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
+    most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
+        lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
+    )
+    data['Disposition'] = data.apply(
+        lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
+    )
+    data['Processing Time'] = np.where(
+        data['File Close Date'].notna(),
+        (data['File Close Date'] - data['Date Reported']).dt.days,
+        np.nan
+    )
+    data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
+    data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
+    data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
+    data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
+    data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
+    data['Year Reported'] = data['Year Reported'].astype(int)
+    data['Month Reported'] = data['Date Reported'].dt.month
+    return data
+# Load the data
+data = load_and_clean_data()
+# Sidebar for controls
 st.sidebar.header("Dashboard Controls")
+year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
+selected_year = st.sidebar.selectbox("Select Year", options=year_options)
+viz_type = st.sidebar.selectbox("Select Visualization", [
+    "Complaint Types", "Geographic Distribution", "Resolution Status",
+    "Submission Methods", "Complaints by Disposition", "Monthly Trends by Complaint Type"
+])
+# Filter data based on year
+filtered_data = data if selected_year == 'All Time' else data[data['Year Reported'] == selected_year]
+st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")
+# Display metrics
 col1, col2, col3 = st.columns(3)
 with col1:
     st.metric("Total Complaints", len(filtered_data))
     avg_time = filtered_data['Processing Time'].mean()
     st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
 with col3:
+    most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
+    st.metric("Most Common Type", most_common)
+# Visualizations
 if viz_type == "Complaint Types":
     st.subheader("Interactive Complaint Types Pie Chart")
     complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
     complaint_counts.columns = ['Complaint Type', 'Count']
+    fig = px.pie(complaint_counts, names='Complaint Type', values='Count', hole=0.4)
     st.plotly_chart(fig, use_container_width=True)
+    st.write("""
+    **Write-up:** This visualization shows the distribution of complaint types as a donut chart.
+    It provides a quick overview of the most common complaints. The warm color palette helps
+    highlight differences between complaint categories.""")
 elif viz_type == "Geographic Distribution":
     st.subheader("Clustered Heatmap of Complaints")
     map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
     m = folium.Map(location=map_center, zoom_start=12)
     heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
     HeatMap(heat_data).add_to(m)
+    st_folium(m, width=700, height=500)
+    st.write("""
+    **Write-up:** This heatmap visualizes complaint hotspots geographically. Areas with
+    higher complaint density are highlighted, helping policymakers focus resources effectively.""")
 elif viz_type == "Resolution Status":
+    st.subheader("Interactive Complaint Resolution Status")
+    resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
+    resolution_counts.columns = ['Disposition', 'Count']
+    resolution_counts['Percentage'] = (resolution_counts['Count'] / resolution_counts['Count'].sum()) * 100
+    chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
+        theta=alt.Theta(field="Count", type="quantitative"),
+        color=alt.Color(field="Disposition", type="nominal"),
+        tooltip=[
+            alt.Tooltip("Disposition", title="Resolution"),
+            alt.Tooltip("Count", title="Count"),
+            alt.Tooltip("Percentage", title="Percentage", format=".2f")
+        ]
+    )
+    st.altair_chart(chart, use_container_width=True)
+    st.write("""
+    **Write-up:** This chart visualizes resolution status using a donut chart.
+    It provides insights into the efficiency of complaint resolutions.""")
+elif viz_type == "Monthly Trends by Complaint Type":
+    st.subheader("Monthly Trends Grouped by Complaint Types")
+    monthly_trends = (
+        filtered_data.groupby(['Month Reported', 'Type of Complaint'])
+        .size()
+        .reset_index(name='Count')
+    )
+    monthly_trends['Month'] = monthly_trends['Month Reported'].apply(
+        lambda x: datetime(2023, x, 1).strftime('%B')
+    )
+    chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
+        x=alt.X('Month Reported:O', title='Month'),
+        y=alt.Y('Count:Q', title='Number of Complaints'),
+        color=alt.Color('Type of Complaint:N', title='Complaint Type'),
+        tooltip=["Type of Complaint:N", "Month:N", "Count:Q"]
+    )
+    st.altair_chart(chart, use_container_width=True)
+    st.write("""
+    **Write-up:** This line chart visualizes monthly trends in complaints grouped by type.
+    The use of vibrant colors helps distinguish trends across different complaint types.""")
 elif viz_type == "Submission Methods":
     st.subheader("Submission Methods Analysis")
     submission_counts = filtered_data['Method Submitted'].value_counts()
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.barplot(x=submission_counts.values, y=submission_counts.index, palette='inferno', ax=ax)
+    ax.set_title(f"Submission Methods in {selected_year}")
     st.pyplot(fig)
+    st.write("""
+    **Write-up:** This bar chart illustrates the preferred methods for complaint submission.
+    The `inferno` color palette highlights differences across submission types.""")
 elif viz_type == "Complaints by Disposition":
     st.subheader("Complaints by Disposition")
     disposition_counts = filtered_data['Disposition'].value_counts()
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
+    ax.set_title(f"Complaints by Disposition in {selected_year}")
+    st.pyplot(fig)
+    st.write("""
+    **Write-up:** This bar chart shows the distribution of complaints across various dispositions.
+    The `viridis` color palette effectively highlights disposition categories.""")