ralate2 commited on
Commit
75594c5
·
verified ·
1 Parent(s): edaf17d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from datetime import datetime
7
+
8
+ # Setting page config
9
+ st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
10
+
11
+ # Project Title and introduction
12
+ st.title("Nuisance Complaints Analysis Dashboard")
13
+ st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
14
+
15
+ # Loading and cleaning data
16
+ @st.cache_data
17
+ def load_and_clean_data():
18
+ # Loading data
19
+ data = pd.read_csv('Nuisance_Complaints_20241130.csv')
20
+
21
+ # Dropping rows with missing File Number
22
+ data = data.dropna(subset=['File Number'])
23
+
24
+ # Handling Date Notice Mailed or Given
25
+ data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
26
+ data['Date Reported'] = pd.to_datetime(data['Date Reported'])
27
+ median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
28
+ data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
29
+
30
+ # Handling Type of Complaint
31
+ data['Type of Complaint'].fillna('Unknown', inplace=True)
32
+
33
+ # Handling Disposition
34
+ most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
35
+ lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
36
+ data['Disposition'] = data.apply(
37
+ lambda row: most_common_disposition[row['Type of Complaint']]
38
+ if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
39
+
40
+ # Handling File Close Date
41
+ data['File Close Date'] = data['File Close Date'].fillna('Unresolved')
42
+
43
+ # Handling Method Submitted
44
+ data['Method Submitted'] = data.apply(
45
+ lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
46
+ else row['Method Submitted'], axis=1)
47
+ mode_method = data['Method Submitted'].mode()[0]
48
+ data['Method Submitted'].fillna(mode_method, inplace=True)
49
+
50
+ # Dropping rows with missing Submitted Online?
51
+ data = data.dropna(subset=['Submitted Online?'])
52
+
53
+ # Handling rows with missing Mapped Location
54
+ data = data.dropna(subset=['Mapped Location'])
55
+
56
+ # Extractingh latitude and longitude
57
+ data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
58
+ data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
59
+
60
+ return data
61
+
62
+ # Loading the data
63
+ try:
64
+ data = load_and_clean_data()
65
+ st.success("Data successfully loaded and cleaned!")
66
+ except Exception as e:
67
+ st.error(f"Error loading data: {str(e)}")
68
+ st.stop()
69
+
70
+ # Creating sidebar
71
+ st.sidebar.header("Dashboard Controls")
72
+ selected_year = st.sidebar.selectbox(
73
+ "Select Year",
74
+ options=sorted(data['Year Reported'].unique()),
75
+ )
76
+
77
+ # Adding visualization type selector
78
+ viz_type = st.sidebar.selectbox(
79
+ "Select Visualization",
80
+ ["Complaint Types", "Geographic Distribution", "Resolution Status",
81
+ "Submission Methods", "Processing Time"]
82
+ )
83
+
84
+ # Filter data based on selected year
85
+ filtered_data = data[data['Year Reported'] == selected_year]
86
+
87
+ # Main content
88
+ st.header(f"Analysis for Year {selected_year}")
89
+
90
+ # Create metrics
91
+ col1, col2, col3 = st.columns(3)
92
+ with col1:
93
+ st.metric("Total Complaints", len(filtered_data))
94
+ with col2:
95
+ avg_process_time = (pd.to_datetime(filtered_data['File Close Date']) -
96
+ filtered_data['Date Reported']).dt.days.mean()
97
+ st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
98
+ with col3:
99
+ st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
100
+
101
+ # Create visualizations based on selection
102
+ if viz_type == "Complaint Types":
103
+ st.subheader("Distribution of Complaint Types")
104
+ fig, ax = plt.subplots(figsize=(10, 6))
105
+ complaint_counts = filtered_data['Type of Complaint'].value_counts()
106
+ sns.barplot(x=complaint_counts.values, y=complaint_counts.index)
107
+ plt.title(f'Complaint Types Distribution in {selected_year}')
108
+ st.pyplot(fig)
109
+
110
+ elif viz_type == "Geographic Distribution":
111
+ st.subheader("Geographic Distribution of Complaints")
112
+ st.map(filtered_data[['Latitude', 'Longitude']])
113
+
114
+ elif viz_type == "Resolution Status":
115
+ st.subheader("Complaint Resolution Status")
116
+ fig, ax = plt.subplots(figsize=(10, 6))
117
+ resolution_counts = filtered_data['Disposition'].value_counts()
118
+ sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
119
+ plt.title(f'Resolution Status Distribution in {selected_year}')
120
+ st.pyplot(fig)
121
+
122
+ elif viz_type == "Submission Methods":
123
+ st.subheader("Submission Methods Analysis")
124
+ fig, ax = plt.subplots(figsize=(10, 6))
125
+ submission_counts = filtered_data['Method Submitted'].value_counts()
126
+ sns.barplot(x=submission_counts.values, y=submission_counts.index)
127
+ plt.title(f'Submission Methods in {selected_year}')
128
+ st.pyplot(fig)
129
+
130
+ else: # Processing Time
131
+ st.subheader("Processing Time Analysis")
132
+ fig, ax = plt.subplots(figsize=(10, 6))
133
+ sns.histplot(data=filtered_data,
134
+ x=(pd.to_datetime(filtered_data['File Close Date']) -
135
+ filtered_data['Date Reported']).dt.days,
136
+ bins=30)
137
+ plt.title(f'Distribution of Processing Times in {selected_year}')
138
+ plt.xlabel('Processing Time (Days)')
139
+ st.pyplot(fig)
140
+
141
+ # Additional insights
142
+ st.header("Key Insights")
143
+ col1, col2 = st.columns(2)
144
+
145
+ with col1:
146
+ st.subheader("Top 3 Complaint Types")
147
+ top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
148
+ st.write(top_complaints)
149
+
150
+ with col2:
151
+ st.subheader("Resolution Efficiency")
152
+ resolution_rate = (filtered_data['Disposition'].value_counts() /
153
+ len(filtered_data) * 100).round(2)
154
+ st.write(resolution_rate)
155
+
156
+ # Footer
157
+ st.markdown("---")
158
+ st.markdown("Dataset provided by the City of Urbana Open Data Portal")