Spaces:

ralate2
/

Group3FinalProjectPart3

Sleeping

App Files Files Community

ralate2 commited on Dec 4, 2024

Commit

46f241a

verified ·

1 Parent(s): 90bb107

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -143

app.py CHANGED Viewed

@@ -1,158 +1,91 @@
-import streamlit as st
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from datetime import datetime
-# Setting page config
-st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
-# Project Title and introduction
-st.title("Nuisance Complaints Analysis Dashboard")
-st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
-# Loading and cleaning data
-@st.cache_data
 def load_and_clean_data():
-    # Loading data
-    data = pd.read_csv('Nuisance_Complaints_20241130.csv')
-    # Dropping rows with missing File Number
-    data = data.dropna(subset=['File Number'])
-    # Handling Date Notice Mailed or Given
-    data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
-    data['Date Reported'] = pd.to_datetime(data['Date Reported'])
-    median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
-    data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
-    # Handling Type of Complaint
-    data['Type of Complaint'].fillna('Unknown', inplace=True)
-    # Handling Disposition
-    most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
-        lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
-    data['Disposition'] = data.apply(
-        lambda row: most_common_disposition[row['Type of Complaint']]
-        if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
-    # Handling File Close Date
-    data['File Close Date'] = data['File Close Date'].fillna('Unresolved')
-    # Handling Method Submitted
-    data['Method Submitted'] = data.apply(
-        lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
-        else row['Method Submitted'], axis=1)
-    mode_method = data['Method Submitted'].mode()[0]
-    data['Method Submitted'].fillna(mode_method, inplace=True)
-    # Dropping rows with missing Submitted Online?
-    data = data.dropna(subset=['Submitted Online?'])
-    # Handling rows with missing  Mapped Location
-    data = data.dropna(subset=['Mapped Location'])
-    # Extractingh latitude and longitude
-    data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
-    data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
-    return data
-# Loading the data
-try:
-    data = load_and_clean_data()
-    st.success("Data successfully loaded and cleaned!")
-except Exception as e:
-    st.error(f"Error loading data: {str(e)}")
-    st.stop()
-# Creating sidebar
-st.sidebar.header("Dashboard Controls")
-selected_year = st.sidebar.selectbox(
-    "Select Year",
-    options=sorted(data['Year Reported'].unique()),
-)
-# Adding visualization type selector
-viz_type = st.sidebar.selectbox(
-    "Select Visualization",
-    ["Complaint Types", "Geographic Distribution", "Resolution Status",
-     "Submission Methods", "Processing Time"]
-)
-# Filter data based on selected year
-filtered_data = data[data['Year Reported'] == selected_year]
-# Main content
-st.header(f"Analysis for Year {selected_year}")
-# Create metrics
-col1, col2, col3 = st.columns(3)
-with col1:
-    st.metric("Total Complaints", len(filtered_data))
-with col2:
-    avg_process_time = (pd.to_datetime(filtered_data['File Close Date']) -
-                       filtered_data['Date Reported']).dt.days.mean()
-    st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
-with col3:
-    st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
-# Create visualizations based on selection
-if viz_type == "Complaint Types":
-    st.subheader("Distribution of Complaint Types")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    complaint_counts = filtered_data['Type of Complaint'].value_counts()
-    sns.barplot(x=complaint_counts.values, y=complaint_counts.index)
-    plt.title(f'Complaint Types Distribution in {selected_year}')
-    st.pyplot(fig)
-elif viz_type == "Geographic Distribution":
-    st.subheader("Geographic Distribution of Complaints")
-    st.map(filtered_data[['Latitude', 'Longitude']])
-elif viz_type == "Resolution Status":
-    st.subheader("Complaint Resolution Status")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    resolution_counts = filtered_data['Disposition'].value_counts()
-    sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
-    plt.title(f'Resolution Status Distribution in {selected_year}')
-    st.pyplot(fig)
-elif viz_type == "Submission Methods":
-    st.subheader("Submission Methods Analysis")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    submission_counts = filtered_data['Method Submitted'].value_counts()
-    sns.barplot(x=submission_counts.values, y=submission_counts.index)
-    plt.title(f'Submission Methods in {selected_year}')
-    st.pyplot(fig)
-else:  # Processing Time
-    st.subheader("Processing Time Analysis")
-    fig, ax = plt.subplots(figsize=(10, 6))
-    sns.histplot(data=filtered_data,
-                 x=(pd.to_datetime(filtered_data['File Close Date']) -
-                    filtered_data['Date Reported']).dt.days,
-                 bins=30)
-    plt.title(f'Distribution of Processing Times in {selected_year}')
-    plt.xlabel('Processing Time (Days)')
-    st.pyplot(fig)
-# Additional insights
-st.header("Key Insights")
-col1, col2 = st.columns(2)
-with col1:
-    st.subheader("Top 3 Complaint Types")
-    top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
-    st.write(top_complaints)
-with col2:
-    st.subheader("Resolution Efficiency")
-    resolution_rate = (filtered_data['Disposition'].value_counts() /
-                      len(filtered_data) * 100).round(2)
-    st.write(resolution_rate)
-# Footer
-st.markdown("---")
-st.markdown("Dataset provided by the City of Urbana Open Data Portal")

 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+import plotly.express as px
 from datetime import datetime
+# Load and clean data
 def load_and_clean_data():
+    data = pd.read_csv('/content/Nuisance_Complaints_20241204.csv')
+    # Convert date columns
+    date_columns = ['Date Reported', 'Date Notice Mailed or Given', 'File Close Date']
+    for col in date_columns:
+        data[col] = pd.to_datetime(data[col], errors='coerce')
+    # Handle missing values
+    data['Type of Complaint'].fillna('Unknown', inplace=True)
+    data['Disposition'].fillna('Pending', inplace=True)
+    data['Method Submitted'].fillna('Not Specified', inplace=True)
+    # Calculate processing time
+    data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
+    # Remove duplicates
+    data.drop_duplicates(subset=['File Number'], keep='first', inplace=True)
+    return data
+# Create visualizations
+def create_visualizations(data):
+    # 1. Complaint Types Over Time
+    plt.figure(figsize=(12, 6))
+    complaints_over_time = data.groupby(['Year Reported', 'Type of Complaint']).size().unstack()
+    complaints_over_time.plot(kind='line', marker='o')
+    plt.title('Trends in Complaint Types Over Years')
+    plt.xlabel('Year')
+    plt.ylabel('Number of Complaints')
+    plt.legend(title='Complaint Type', bbox_to_anchor=(1.05, 1))
+    plt.tight_layout()
+    plt.show()
+    # 2. Resolution Distribution
+    plt.figure(figsize=(10, 6))
+    sns.countplot(data=data, y='Disposition', order=data['Disposition'].value_counts().index)
+    plt.title('Distribution of Complaint Resolutions')
+    plt.xlabel('Count')
+    plt.ylabel('Resolution Type')
+    plt.tight_layout()
+    plt.show()
+    # 3. Average Processing Time by Submission Method
+    plt.figure(figsize=(10, 6))
+    avg_processing_time = data.groupby('Method Submitted')['Processing Time'].mean().sort_values()
+    sns.barplot(x=avg_processing_time.values, y=avg_processing_time.index)
+    plt.title('Average Processing Time by Submission Method')
+    plt.xlabel('Average Processing Time (Days)')
+    plt.ylabel('Submission Method')
+    plt.tight_layout()
+    plt.show()
+    # 4. Monthly Distribution of Complaints
+    plt.figure(figsize=(10, 6))
+    monthly_complaints = data.groupby('Month Reported').size()
+    sns.barplot(x=monthly_complaints.index, y=monthly_complaints.values)
+    plt.title('Monthly Distribution of Complaints')
+    plt.xlabel('Month')
+    plt.ylabel('Number of Complaints')
+    plt.tight_layout()
+    plt.show()
+    # 5. Complaint Type Distribution
+    plt.figure(figsize=(10, 6))
+    sns.countplot(data=data, y='Type of Complaint',
+                 order=data['Type of Complaint'].value_counts().index)
+    plt.title('Distribution of Complaint Types')
+    plt.xlabel('Count')
+    plt.ylabel('Complaint Type')
+    plt.tight_layout()
+    plt.show()
+# Main execution
+def main():
+    # Load and clean data
+    data = load_and_clean_data()
+    # Create visualizations
+    create_visualizations(data)
+if __name__ == "__main__":
+    main()