Spaces:

ralate2
/

Group3FinalProjectPart3

Sleeping

App Files Files Community

ralate2 commited on Dec 4, 2024

Commit

a94ed92

verified ·

1 Parent(s): 68f7786

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -102

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import plotly.express as px
 from datetime import datetime
 # Set page config
@@ -11,62 +8,61 @@ st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
 # Title and introduction
 st.title("Nuisance Complaints Analysis Dashboard")
-st.markdown("**Team Members:** Lu Chang (luchang2@illinois.edu), Qiming Li (qimingl4@illinois.edu), Ruchita Alate (ralate2@illinois.edu), Shreyas Kulkarni (ssk16@illinois.edu), Vishal Devulapalli (nsd3@illinois.edu)")
 st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
 # Load and clean data
 @st.cache_data
-def load_and_clean_data():
     # Load data
-    data = pd.read_csv('Nuisance_Complaints_20241130.csv')
-    # Drop rows with missing File Number
     data = data.dropna(subset=['File Number'])
-    # Handle Date Notice Mailed or Given
     data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
     data['Date Reported'] = pd.to_datetime(data['Date Reported'])
     median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
     data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
-    # Handle Type of Complaint
     data['Type of Complaint'].fillna('Unknown', inplace=True)
-    # Handle Disposition
     most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
         lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
     data['Disposition'] = data.apply(
-        lambda row: most_common_disposition[row['Type of Complaint']]
         if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
-    # Handle File Close Date
     data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
     # Calculate processing time only for resolved cases
     data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
-    # Handle Method Submitted
     data['Method Submitted'] = data.apply(
         lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
         else row['Method Submitted'], axis=1)
     mode_method = data['Method Submitted'].mode()[0]
     data['Method Submitted'].fillna(mode_method, inplace=True)
-    # Drop rows with missing Submitted Online?
     data = data.dropna(subset=['Submitted Online?'])
-    # Handle Mapped Location
-    data = data.dropna(subset=['Mapped Location'])
-    # Extract latitude and longitude
     data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
     data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
     return data
 # Load the data
 try:
-    data = load_and_clean_data()
     st.success("Data successfully loaded and cleaned!")
 except Exception as e:
     st.error(f"Error loading data: {str(e)}")
@@ -98,7 +94,7 @@ with col1:
     st.metric("Total Complaints", len(filtered_data))
 with col2:
     # Calculate average processing time only for resolved cases
-    resolved_cases = filtered_data[filtered_data['File Close Date'].notna()]
     if len(resolved_cases) > 0:
         avg_process_time = resolved_cases['Processing Time'].mean()
         st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
@@ -107,80 +103,14 @@ with col2:
 with col3:
     st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
 if viz_type == "Complaint Types":
-    # Interactive Pie Chart
-    st.subheader("Interactive Complaint Types Pie Chart")
-    complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
-    complaint_counts.columns = ['Complaint Type', 'Count']
-    fig = px.pie(
-        complaint_counts,
-        names='Complaint Type',
-        values='Count',
-        title=f'Complaint Types Distribution in {selected_year}',
-        hole=0.4  # Donut style
-    )
-    fig.update_traces(textinfo='percent+label')
-    st.plotly_chart(fig, use_container_width=True)
 elif viz_type == "Geographic Distribution":
-    # Clustered Heatmap
-    st.subheader("Clustered Heatmap of Complaints")
-    map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
-    m = folium.Map(location=map_center, zoom_start=12)
-    heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
-    HeatMap(heat_data).add_to(m)
-    st_data = st_folium(m, width=700, height=500)
 elif viz_type == "Resolution Status":
-    st.subheader("Complaint Resolution Status")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    resolution_counts = filtered_data['Disposition'].value_counts()
-    sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
-    plt.title(f'Resolution Status Distribution in {selected_year}')
-    st.pyplot(fig)
 elif viz_type == "Submission Methods":
-    st.subheader("Submission Methods Analysis")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    submission_counts = filtered_data['Method Submitted'].value_counts()
-    sns.barplot(x=submission_counts.values, y=submission_counts.index)
-    plt.title(f'Submission Methods in {selected_year}')
-    st.pyplot(fig)
 elif viz_type == "Processing Time":
-    st.subheader("Processing Time Analysis")
-    # Filter for resolved cases only
-    resolved_data = filtered_data[filtered_data['File Close Date'].notna()]
-    if len(resolved_data) > 0:
-        fig, ax = plt.subplots(figsize=(10, 6))
-        sns.histplot(data=resolved_data, x='Processing Time', bins=30)
-        plt.title(f'Distribution of Processing Times in {selected_year}')
-        plt.xlabel('Processing Time (Days)')
-        st.pyplot(fig)
-    else:
-        st.write("No resolved cases in this period")
-# Additional insights
-st.header("Key Insights")
-col1, col2 = st.columns(2)
-with col1:
-    st.subheader("Top 3 Complaint Types")
-    top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
-    st.write(top_complaints)
-with col2:
-    st.subheader("Resolution Efficiency")
-    resolution_rate = (filtered_data['Disposition'].value_counts() /
-                      len(filtered_data) * 100).round(2)
-    st.write(resolution_rate)
-# Footer
-st.markdown("---")
-st.markdown("Dataset provided by the City of Urbana Open Data Portal")

 import streamlit as st
 import pandas as pd
 import numpy as np
 from datetime import datetime
 # Set page config
 # Title and introduction
 st.title("Nuisance Complaints Analysis Dashboard")
+st.markdown("**Team Members:** Shreyas Kulkarni (ssk16@illinois.edu), Vishal Devulapalli (nsd3@illinois.edu), Lu Chang (luchang2@illinois.edu), Li Qiming (qimingl4@illinois.edu), Ruchita Alate (ralate2@illinois.edu)")
 st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
 # Load and clean data
 @st.cache_data
+def load_and_clean_data(file_path):
     # Load data
+    data = pd.read_csv(file_path)
+    # Drop rows with missing 'File Number'
     data = data.dropna(subset=['File Number'])
+    # Handle 'Date Notice Mailed or Given': Impute using median time from 'Date Reported'
     data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
     data['Date Reported'] = pd.to_datetime(data['Date Reported'])
     median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
     data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
+    # Handle 'Type of Complaint': Fill missing with 'Unknown'
     data['Type of Complaint'].fillna('Unknown', inplace=True)
+    # Handle 'Disposition': Impute based on the most common value for the same complaint type
     most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
         lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
     data['Disposition'] = data.apply(
+        lambda row: most_common_disposition[row['Type of Complaint']]
         if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
+    # Handle 'File Close Date': Fill missing with 'Unresolved'
     data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
+    data['File Close Date'].fillna('Unresolved', inplace=True)
     # Calculate processing time only for resolved cases
     data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
+    # Handle 'Method Submitted': Infer based on 'Submitted Online?'
     data['Method Submitted'] = data.apply(
         lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
         else row['Method Submitted'], axis=1)
     mode_method = data['Method Submitted'].mode()[0]
     data['Method Submitted'].fillna(mode_method, inplace=True)
+    # Drop rows with missing 'Submitted Online?'
     data = data.dropna(subset=['Submitted Online?'])
+    # Handle 'Mapped Location': Extract latitude and longitude
     data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
     data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
     return data
 # Load the data
+file_path = "Nuisance_Complaints_20241130.csv"
 try:
+    data = load_and_clean_data(file_path)
     st.success("Data successfully loaded and cleaned!")
 except Exception as e:
     st.error(f"Error loading data: {str(e)}")
     st.metric("Total Complaints", len(filtered_data))
 with col2:
     # Calculate average processing time only for resolved cases
+    resolved_cases = filtered_data[filtered_data['File Close Date'] != 'Unresolved']
     if len(resolved_cases) > 0:
         avg_process_time = resolved_cases['Processing Time'].mean()
         st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
 with col3:
     st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
+# Add additional visualizations or tables based on `viz_type` here
 if viz_type == "Complaint Types":
+    st.write("Visualization for Complaint Types will go here.")
 elif viz_type == "Geographic Distribution":
+    st.write("Visualization for Geographic Distribution will go here.")
 elif viz_type == "Resolution Status":
+    st.write("Visualization for Resolution Status will go here.")
 elif viz_type == "Submission Methods":
+    st.write("Visualization for Submission Methods will go here.")
 elif viz_type == "Processing Time":
+    st.write("Visualization for Processing Time will go here.")