ralate2 commited on
Commit
b7feb2b
·
verified ·
1 Parent(s): 0adda8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -161
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
 
6
  import folium
7
  from folium.plugins import HeatMap
8
  from streamlit_folium import st_folium
@@ -21,118 +22,66 @@ st.markdown("""
21
  * Ruchita Alate (ralate2@illinois.edu)
22
  * Shreyas Kulkarni (ssk16@illinois.edu)
23
  * Vishal Devulapalli (nsd3@illinois.edu)
 
 
24
  """)
25
- st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
26
 
27
  # Load and clean data
28
  @st.cache_data
29
  def load_and_clean_data():
30
- try:
31
- # Load data
32
- data = pd.read_csv('Nuisance_Complaints_20241130.csv')
33
-
34
- # Drop rows with missing 'File Number'
35
- data = data.dropna(subset=['File Number'])
36
-
37
- # Convert dates and handle date-related columns
38
- data['Date Reported'] = pd.to_datetime(data['Date Reported'])
39
- data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
40
- data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
41
-
42
- # Handle 'Date Notice Mailed or Given'
43
- median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
44
- data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
45
- data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
46
-
47
- # Handle 'Type of Complaint'
48
- data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
49
-
50
- # Handle 'Disposition'
51
- most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
52
- lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
53
- )
54
- data['Disposition'] = data.apply(
55
- lambda row: most_common_disposition[row['Type of Complaint']]
56
- if pd.isna(row['Disposition']) else row['Disposition'],
57
- axis=1
58
- )
59
-
60
- # Calculate processing time for resolved cases
61
- data['Processing Time'] = np.where(
62
- data['File Close Date'].notna(),
63
- (data['File Close Date'] - data['Date Reported']).dt.days,
64
- np.nan
65
- )
66
-
67
- # Handle 'Method Submitted'
68
- data.loc[
69
- (data['Submitted Online?']) & (data['Method Submitted'].isna()),
70
- 'Method Submitted'
71
- ] = 'Online'
72
- data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
73
-
74
- # Drop rows with missing critical values
75
- data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
76
-
77
- # Extract and clean location data
78
- data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
79
- data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
80
-
81
- # Ensure Year Reported is integer
82
- data['Year Reported'] = data['Year Reported'].astype(int)
83
-
84
- return data
85
-
86
- except Exception as e:
87
- st.error(f"Error in data preprocessing: {str(e)}")
88
- raise e
89
 
90
- # Load the data
91
- try:
92
- data = load_and_clean_data()
93
- st.success("Data successfully loaded and cleaned!")
94
- except Exception as e:
95
- st.error(f"Error loading data: {str(e)}")
96
- st.stop()
 
 
97
 
98
- # Create sidebar
 
 
 
 
 
 
 
99
 
 
100
  st.sidebar.header("Dashboard Controls")
 
 
 
 
 
 
101
 
102
- # Get unique years and convert to list for selectbox
103
- year_list = sorted(data['Year Reported'].unique().tolist())
104
- year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers
105
-
106
- selected_year = st.sidebar.selectbox(
107
- "Select Year",
108
- options=year_options,
109
- )
110
- # Add visualization type selector
111
- viz_type = st.sidebar.selectbox(
112
- "Select Visualization",
113
- ["Complaint Types", "Geographic Distribution", "Resolution Status",
114
- "Submission Methods", "Complaints by Disposition"]
115
- )
116
-
117
-
118
- # Filter data based on selected year
119
- if selected_year == 'All Time':
120
- filtered_data = data # Use complete dataset when 'All Time' is selected
121
- else:
122
- filtered_data = data[data['Year Reported'] == selected_year]
123
-
124
- # Update header text
125
- if selected_year == 'All Time':
126
- st.header("Analysis for All Time")
127
- else:
128
- st.header(f"Analysis for Year {selected_year}")
129
- # Main content
130
-
131
- # Create metrics
132
- # Create metrics
133
- # Create metrics
134
- # Create metrics
135
- # Create metrics
136
  col1, col2, col3 = st.columns(3)
137
  with col1:
138
  st.metric("Total Complaints", len(filtered_data))
@@ -140,85 +89,90 @@ with col2:
140
  avg_time = filtered_data['Processing Time'].mean()
141
  st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
142
  with col3:
143
- if not filtered_data.empty:
144
- most_common = filtered_data['Type of Complaint'].value_counts().index[0]
145
- st.metric("Most Common Type", most_common)
146
- else:
147
- st.metric("Most Common Type", "N/A")
148
  if viz_type == "Complaint Types":
149
- # Interactive Pie Chart
150
  st.subheader("Interactive Complaint Types Pie Chart")
151
  complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
152
  complaint_counts.columns = ['Complaint Type', 'Count']
153
-
154
- fig = px.pie(
155
- complaint_counts,
156
- names='Complaint Type',
157
- values='Count',
158
- title=f'Complaint Types Distribution in {selected_year}',
159
- hole=0.4 # Donut style
160
- )
161
- fig.update_traces(textinfo='percent+label')
162
  st.plotly_chart(fig, use_container_width=True)
 
 
 
 
163
 
164
  elif viz_type == "Geographic Distribution":
165
- # Clustered Heatmap
166
  st.subheader("Clustered Heatmap of Complaints")
167
  map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
168
  m = folium.Map(location=map_center, zoom_start=12)
169
-
170
  heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
171
  HeatMap(heat_data).add_to(m)
172
-
173
- st_data = st_folium(m, width=700, height=500)
174
-
 
175
 
176
  elif viz_type == "Resolution Status":
177
- st.subheader("Complaint Resolution Status")
178
- fig, ax = plt.subplots(figsize=(10, 6))
179
- resolution_counts = filtered_data['Disposition'].value_counts()
180
- sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
181
- plt.title(f'Resolution Status Distribution in {selected_year}')
182
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  elif viz_type == "Submission Methods":
185
  st.subheader("Submission Methods Analysis")
186
- fig, ax = plt.subplots(figsize=(10, 6))
187
  submission_counts = filtered_data['Method Submitted'].value_counts()
188
- sns.barplot(x=submission_counts.values, y=submission_counts.index)
189
- plt.title(f'Submission Methods in {selected_year}')
 
190
  st.pyplot(fig)
191
-
 
 
192
 
193
  elif viz_type == "Complaints by Disposition":
194
  st.subheader("Complaints by Disposition")
195
  disposition_counts = filtered_data['Disposition'].value_counts()
196
-
197
- if not disposition_counts.empty:
198
- fig, ax = plt.subplots(figsize=(10, 6))
199
- sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
200
- ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
201
- ax.set_xlabel('Number of Complaints', fontsize=12)
202
- ax.set_ylabel('Disposition', fontsize=12)
203
- st.pyplot(fig)
204
- else:
205
- st.write("No data available for the selected year.")
206
-
207
- # Additional insights
208
- st.header("Key Insights")
209
- col1, col2 = st.columns(2)
210
-
211
- with col1:
212
- st.subheader("Top 3 Complaint Types")
213
- top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
214
- st.write(top_complaints)
215
-
216
- with col2:
217
- st.subheader("Resolution Efficiency")
218
- resolution_rate = (filtered_data['Disposition'].value_counts() /
219
- len(filtered_data) * 100).round(2)
220
- st.write(resolution_rate)
221
-
222
- # Footer
223
- st.markdown("---")
224
- st.markdown("Dataset provided by the City of Urbana Open Data Portal")
 
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
+ import altair as alt
7
  import folium
8
  from folium.plugins import HeatMap
9
  from streamlit_folium import st_folium
 
22
  * Ruchita Alate (ralate2@illinois.edu)
23
  * Shreyas Kulkarni (ssk16@illinois.edu)
24
  * Vishal Devulapalli (nsd3@illinois.edu)
25
+
26
+ This dashboard analyzes nuisance complaints data from the City of Urbana. The visualizations aim to explore complaint trends, resolution efficiency, and geographic patterns to provide actionable insights for urban planning and management.
27
  """)
 
28
 
29
  # Load and clean data
30
  @st.cache_data
31
  def load_and_clean_data():
32
+ data = pd.read_csv('Nuisance_Complaints_20241204.csv')
33
+ data = data.dropna(subset=['File Number'])
34
+ data['Date Reported'] = pd.to_datetime(data['Date Reported'])
35
+ data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
36
+ data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
37
+
38
+ # Fill missing dates
39
+ median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
40
+ data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
41
+ data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
42
+
43
+ data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
44
+ most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
45
+ lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
46
+ )
47
+ data['Disposition'] = data.apply(
48
+ lambda row: most_common_disposition[row['Type of Complaint']] if pd.isna(row['Disposition']) else row['Disposition'], axis=1
49
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ data['Processing Time'] = np.where(
52
+ data['File Close Date'].notna(),
53
+ (data['File Close Date'] - data['Date Reported']).dt.days,
54
+ np.nan
55
+ )
56
+
57
+ data.loc[(data['Submitted Online?']) & (data['Method Submitted'].isna()), 'Method Submitted'] = 'Online'
58
+ data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
59
+ data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
60
 
61
+ data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
62
+ data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
63
+ data['Year Reported'] = data['Year Reported'].astype(int)
64
+ data['Month Reported'] = data['Date Reported'].dt.month
65
+ return data
66
+
67
+ # Load the data
68
+ data = load_and_clean_data()
69
 
70
+ # Sidebar for controls
71
  st.sidebar.header("Dashboard Controls")
72
+ year_options = ['All Time'] + sorted(data['Year Reported'].unique().tolist())
73
+ selected_year = st.sidebar.selectbox("Select Year", options=year_options)
74
+ viz_type = st.sidebar.selectbox("Select Visualization", [
75
+ "Complaint Types", "Geographic Distribution", "Resolution Status",
76
+ "Submission Methods", "Complaints by Disposition", "Monthly Trends by Complaint Type"
77
+ ])
78
 
79
+ # Filter data based on year
80
+ filtered_data = data if selected_year == 'All Time' else data[data['Year Reported'] == selected_year]
81
+
82
+ st.header(f"Analysis for {'All Time' if selected_year == 'All Time' else selected_year}")
83
+
84
+ # Display metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  col1, col2, col3 = st.columns(3)
86
  with col1:
87
  st.metric("Total Complaints", len(filtered_data))
 
89
  avg_time = filtered_data['Processing Time'].mean()
90
  st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
91
  with col3:
92
+ most_common = filtered_data['Type of Complaint'].value_counts().index[0] if not filtered_data.empty else "N/A"
93
+ st.metric("Most Common Type", most_common)
94
+
95
+ # Visualizations
 
96
  if viz_type == "Complaint Types":
 
97
  st.subheader("Interactive Complaint Types Pie Chart")
98
  complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
99
  complaint_counts.columns = ['Complaint Type', 'Count']
100
+ fig = px.pie(complaint_counts, names='Complaint Type', values='Count', hole=0.4)
 
 
 
 
 
 
 
 
101
  st.plotly_chart(fig, use_container_width=True)
102
+ st.write("""
103
+ **Write-up:** This visualization shows the distribution of complaint types as a donut chart.
104
+ It provides a quick overview of the most common complaints. The warm color palette helps
105
+ highlight differences between complaint categories.""")
106
 
107
  elif viz_type == "Geographic Distribution":
 
108
  st.subheader("Clustered Heatmap of Complaints")
109
  map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
110
  m = folium.Map(location=map_center, zoom_start=12)
 
111
  heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
112
  HeatMap(heat_data).add_to(m)
113
+ st_folium(m, width=700, height=500)
114
+ st.write("""
115
+ **Write-up:** This heatmap visualizes complaint hotspots geographically. Areas with
116
+ higher complaint density are highlighted, helping policymakers focus resources effectively.""")
117
 
118
  elif viz_type == "Resolution Status":
119
+ st.subheader("Interactive Complaint Resolution Status")
120
+ resolution_counts = filtered_data['Disposition'].value_counts().reset_index()
121
+ resolution_counts.columns = ['Disposition', 'Count']
122
+ resolution_counts['Percentage'] = (resolution_counts['Count'] / resolution_counts['Count'].sum()) * 100
123
+ chart = alt.Chart(resolution_counts).mark_arc(innerRadius=50).encode(
124
+ theta=alt.Theta(field="Count", type="quantitative"),
125
+ color=alt.Color(field="Disposition", type="nominal"),
126
+ tooltip=[
127
+ alt.Tooltip("Disposition", title="Resolution"),
128
+ alt.Tooltip("Count", title="Count"),
129
+ alt.Tooltip("Percentage", title="Percentage", format=".2f")
130
+ ]
131
+ )
132
+ st.altair_chart(chart, use_container_width=True)
133
+ st.write("""
134
+ **Write-up:** This chart visualizes resolution status using a donut chart.
135
+ It provides insights into the efficiency of complaint resolutions.""")
136
+
137
+ elif viz_type == "Monthly Trends by Complaint Type":
138
+ st.subheader("Monthly Trends Grouped by Complaint Types")
139
+ monthly_trends = (
140
+ filtered_data.groupby(['Month Reported', 'Type of Complaint'])
141
+ .size()
142
+ .reset_index(name='Count')
143
+ )
144
+ monthly_trends['Month'] = monthly_trends['Month Reported'].apply(
145
+ lambda x: datetime(2023, x, 1).strftime('%B')
146
+ )
147
+ chart = alt.Chart(monthly_trends).mark_line(point=True).encode(
148
+ x=alt.X('Month Reported:O', title='Month'),
149
+ y=alt.Y('Count:Q', title='Number of Complaints'),
150
+ color=alt.Color('Type of Complaint:N', title='Complaint Type'),
151
+ tooltip=["Type of Complaint:N", "Month:N", "Count:Q"]
152
+ )
153
+ st.altair_chart(chart, use_container_width=True)
154
+ st.write("""
155
+ **Write-up:** This line chart visualizes monthly trends in complaints grouped by type.
156
+ The use of vibrant colors helps distinguish trends across different complaint types.""")
157
 
158
  elif viz_type == "Submission Methods":
159
  st.subheader("Submission Methods Analysis")
 
160
  submission_counts = filtered_data['Method Submitted'].value_counts()
161
+ fig, ax = plt.subplots(figsize=(10, 6))
162
+ sns.barplot(x=submission_counts.values, y=submission_counts.index, palette='inferno', ax=ax)
163
+ ax.set_title(f"Submission Methods in {selected_year}")
164
  st.pyplot(fig)
165
+ st.write("""
166
+ **Write-up:** This bar chart illustrates the preferred methods for complaint submission.
167
+ The `inferno` color palette highlights differences across submission types.""")
168
 
169
  elif viz_type == "Complaints by Disposition":
170
  st.subheader("Complaints by Disposition")
171
  disposition_counts = filtered_data['Disposition'].value_counts()
172
+ fig, ax = plt.subplots(figsize=(10, 6))
173
+ sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette='viridis', ax=ax)
174
+ ax.set_title(f"Complaints by Disposition in {selected_year}")
175
+ st.pyplot(fig)
176
+ st.write("""
177
+ **Write-up:** This bar chart shows the distribution of complaints across various dispositions.
178
+ The `viridis` color palette effectively highlights disposition categories.""")