Spaces:

ralate2
/

Group3FinalProjectPart3

Sleeping

App Files Files Community

ralate2 commited on Dec 4, 2024

Commit

75594c5

verified ·

1 Parent(s): edaf17d

Upload app.py

Browse files

Files changed (1) hide show

app.py +158 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from datetime import datetime
+# Setting page config
+st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
+# Project Title and introduction
+st.title("Nuisance Complaints Analysis Dashboard")
+st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
+# Loading and cleaning data
+@st.cache_data
+def load_and_clean_data():
+    # Loading data
+    data = pd.read_csv('Nuisance_Complaints_20241130.csv')
+    # Dropping rows with missing File Number
+    data = data.dropna(subset=['File Number'])
+    # Handling Date Notice Mailed or Given
+    data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
+    data['Date Reported'] = pd.to_datetime(data['Date Reported'])
+    median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
+    data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
+    # Handling Type of Complaint
+    data['Type of Complaint'].fillna('Unknown', inplace=True)
+    # Handling Disposition
+    most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
+        lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
+    data['Disposition'] = data.apply(
+        lambda row: most_common_disposition[row['Type of Complaint']]
+        if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
+    # Handling File Close Date
+    data['File Close Date'] = data['File Close Date'].fillna('Unresolved')
+    # Handling Method Submitted
+    data['Method Submitted'] = data.apply(
+        lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
+        else row['Method Submitted'], axis=1)
+    mode_method = data['Method Submitted'].mode()[0]
+    data['Method Submitted'].fillna(mode_method, inplace=True)
+    # Dropping rows with missing Submitted Online?
+    data = data.dropna(subset=['Submitted Online?'])
+    # Handling rows with missing  Mapped Location
+    data = data.dropna(subset=['Mapped Location'])
+    # Extractingh latitude and longitude
+    data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
+    data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
+    return data
+# Loading the data
+try:
+    data = load_and_clean_data()
+    st.success("Data successfully loaded and cleaned!")
+except Exception as e:
+    st.error(f"Error loading data: {str(e)}")
+    st.stop()
+# Creating sidebar
+st.sidebar.header("Dashboard Controls")
+selected_year = st.sidebar.selectbox(
+    "Select Year",
+    options=sorted(data['Year Reported'].unique()),
+)
+# Adding visualization type selector
+viz_type = st.sidebar.selectbox(
+    "Select Visualization",
+    ["Complaint Types", "Geographic Distribution", "Resolution Status",
+     "Submission Methods", "Processing Time"]
+)
+# Filter data based on selected year
+filtered_data = data[data['Year Reported'] == selected_year]
+# Main content
+st.header(f"Analysis for Year {selected_year}")
+# Create metrics
+col1, col2, col3 = st.columns(3)
+with col1:
+    st.metric("Total Complaints", len(filtered_data))
+with col2:
+    avg_process_time = (pd.to_datetime(filtered_data['File Close Date']) -
+                       filtered_data['Date Reported']).dt.days.mean()
+    st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
+with col3:
+    st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
+# Create visualizations based on selection
+if viz_type == "Complaint Types":
+    st.subheader("Distribution of Complaint Types")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    complaint_counts = filtered_data['Type of Complaint'].value_counts()
+    sns.barplot(x=complaint_counts.values, y=complaint_counts.index)
+    plt.title(f'Complaint Types Distribution in {selected_year}')
+    st.pyplot(fig)
+elif viz_type == "Geographic Distribution":
+    st.subheader("Geographic Distribution of Complaints")
+    st.map(filtered_data[['Latitude', 'Longitude']])
+elif viz_type == "Resolution Status":
+    st.subheader("Complaint Resolution Status")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    resolution_counts = filtered_data['Disposition'].value_counts()
+    sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
+    plt.title(f'Resolution Status Distribution in {selected_year}')
+    st.pyplot(fig)
+elif viz_type == "Submission Methods":
+    st.subheader("Submission Methods Analysis")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    submission_counts = filtered_data['Method Submitted'].value_counts()
+    sns.barplot(x=submission_counts.values, y=submission_counts.index)
+    plt.title(f'Submission Methods in {selected_year}')
+    st.pyplot(fig)
+else:  # Processing Time
+    st.subheader("Processing Time Analysis")
+    fig, ax = plt.subplots(figsize=(10, 6))
+    sns.histplot(data=filtered_data,
+                 x=(pd.to_datetime(filtered_data['File Close Date']) -
+                    filtered_data['Date Reported']).dt.days,
+                 bins=30)
+    plt.title(f'Distribution of Processing Times in {selected_year}')
+    plt.xlabel('Processing Time (Days)')
+    st.pyplot(fig)
+# Additional insights
+st.header("Key Insights")
+col1, col2 = st.columns(2)
+with col1:
+    st.subheader("Top 3 Complaint Types")
+    top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
+    st.write(top_complaints)
+with col2:
+    st.subheader("Resolution Efficiency")
+    resolution_rate = (filtered_data['Disposition'].value_counts() /
+                      len(filtered_data) * 100).round(2)
+    st.write(resolution_rate)
+# Footer
+st.markdown("---")
+st.markdown("Dataset provided by the City of Urbana Open Data Portal")