ralate2 commited on
Commit
5d0c96a
·
verified ·
1 Parent(s): a94ed92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -67
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
 
 
4
  from datetime import datetime
5
 
6
  # Set page config
@@ -8,73 +11,99 @@ st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
8
 
9
  # Title and introduction
10
  st.title("Nuisance Complaints Analysis Dashboard")
11
- st.markdown("**Team Members:** Shreyas Kulkarni (ssk16@illinois.edu), Vishal Devulapalli (nsd3@illinois.edu), Lu Chang (luchang2@illinois.edu), Li Qiming (qimingl4@illinois.edu), Ruchita Alate (ralate2@illinois.edu)")
 
 
 
 
 
 
 
12
  st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
13
 
14
  # Load and clean data
15
  @st.cache_data
16
- def load_and_clean_data(file_path):
17
- # Load data
18
- data = pd.read_csv(file_path)
19
-
20
- # Drop rows with missing 'File Number'
21
- data = data.dropna(subset=['File Number'])
22
-
23
- # Handle 'Date Notice Mailed or Given': Impute using median time from 'Date Reported'
24
- data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
25
- data['Date Reported'] = pd.to_datetime(data['Date Reported'])
26
- median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
27
- data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
28
-
29
- # Handle 'Type of Complaint': Fill missing with 'Unknown'
30
- data['Type of Complaint'].fillna('Unknown', inplace=True)
31
-
32
- # Handle 'Disposition': Impute based on the most common value for the same complaint type
33
- most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
34
- lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
35
- data['Disposition'] = data.apply(
36
- lambda row: most_common_disposition[row['Type of Complaint']]
37
- if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
38
-
39
- # Handle 'File Close Date': Fill missing with 'Unresolved'
40
- data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
41
- data['File Close Date'].fillna('Unresolved', inplace=True)
42
-
43
- # Calculate processing time only for resolved cases
44
- data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
45
-
46
- # Handle 'Method Submitted': Infer based on 'Submitted Online?'
47
- data['Method Submitted'] = data.apply(
48
- lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
49
- else row['Method Submitted'], axis=1)
50
- mode_method = data['Method Submitted'].mode()[0]
51
- data['Method Submitted'].fillna(mode_method, inplace=True)
52
-
53
- # Drop rows with missing 'Submitted Online?'
54
- data = data.dropna(subset=['Submitted Online?'])
55
-
56
- # Handle 'Mapped Location': Extract latitude and longitude
57
- data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
58
- data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
59
-
60
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  # Load the data
63
- file_path = "Nuisance_Complaints_20241130.csv"
64
  try:
65
- data = load_and_clean_data(file_path)
66
  st.success("Data successfully loaded and cleaned!")
67
  except Exception as e:
68
  st.error(f"Error loading data: {str(e)}")
69
  st.stop()
70
 
71
  # Create sidebar
 
72
  st.sidebar.header("Dashboard Controls")
 
 
 
 
 
73
  selected_year = st.sidebar.selectbox(
74
  "Select Year",
75
- options=sorted(data['Year Reported'].unique()),
76
  )
77
-
78
  # Add visualization type selector
79
  viz_type = st.sidebar.selectbox(
80
  "Select Visualization",
@@ -82,35 +111,110 @@ viz_type = st.sidebar.selectbox(
82
  "Submission Methods", "Processing Time"]
83
  )
84
 
 
85
  # Filter data based on selected year
86
- filtered_data = data[data['Year Reported'] == selected_year]
 
 
 
87
 
 
 
 
 
 
88
  # Main content
89
- st.header(f"Analysis for Year {selected_year}")
90
 
 
 
 
 
91
  # Create metrics
92
  col1, col2, col3 = st.columns(3)
93
  with col1:
94
  st.metric("Total Complaints", len(filtered_data))
95
  with col2:
96
- # Calculate average processing time only for resolved cases
97
- resolved_cases = filtered_data[filtered_data['File Close Date'] != 'Unresolved']
98
- if len(resolved_cases) > 0:
99
- avg_process_time = resolved_cases['Processing Time'].mean()
100
- st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
101
- else:
102
- st.metric("Average Processing Time", "N/A")
103
  with col3:
104
- st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
105
-
106
- # Add additional visualizations or tables based on `viz_type` here
 
 
107
  if viz_type == "Complaint Types":
108
- st.write("Visualization for Complaint Types will go here.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  elif viz_type == "Geographic Distribution":
110
- st.write("Visualization for Geographic Distribution will go here.")
 
 
 
 
 
 
 
 
 
 
111
  elif viz_type == "Resolution Status":
112
- st.write("Visualization for Resolution Status will go here.")
 
 
 
 
 
 
113
  elif viz_type == "Submission Methods":
114
- st.write("Visualization for Submission Methods will go here.")
 
 
 
 
 
 
 
115
  elif viz_type == "Processing Time":
116
- st.write("Visualization for Processing Time will go here.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import plotly.express as px
7
  from datetime import datetime
8
 
9
  # Set page config
 
11
 
12
  # Title and introduction
13
  st.title("Nuisance Complaints Analysis Dashboard")
14
+ st.markdown("""
15
+ **Team Members:**
16
+ * Lu Chang (luchang2@illinois.edu)
17
+ * Qiming Li (qimingl4@illinois.edu)
18
+ * Ruchita Alate (ralate2@illinois.edu)
19
+ * Shreyas Kulkarni (ssk16@illinois.edu)
20
+ * Vishal Devulapalli (nsd3@illinois.edu)
21
+ """)
22
  st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
23
 
24
  # Load and clean data
25
  @st.cache_data
26
+ def load_and_clean_data():
27
+ try:
28
+ # Load data
29
+ data = pd.read_csv('Nuisance_Complaints_20241204.csv')
30
+
31
+ # Drop rows with missing 'File Number'
32
+ data = data.dropna(subset=['File Number'])
33
+
34
+ # Convert dates and handle date-related columns
35
+ data['Date Reported'] = pd.to_datetime(data['Date Reported'])
36
+ data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
37
+ data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
38
+
39
+ # Handle 'Date Notice Mailed or Given'
40
+ median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
41
+ data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
42
+ data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
43
+
44
+ # Handle 'Type of Complaint'
45
+ data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
46
+
47
+ # Handle 'Disposition'
48
+ most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
49
+ lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
50
+ )
51
+ data['Disposition'] = data.apply(
52
+ lambda row: most_common_disposition[row['Type of Complaint']]
53
+ if pd.isna(row['Disposition']) else row['Disposition'],
54
+ axis=1
55
+ )
56
+
57
+ # Calculate processing time for resolved cases
58
+ data['Processing Time'] = np.where(
59
+ data['File Close Date'].notna(),
60
+ (data['File Close Date'] - data['Date Reported']).dt.days,
61
+ np.nan
62
+ )
63
+
64
+ # Handle 'Method Submitted'
65
+ data.loc[
66
+ (data['Submitted Online?']) & (data['Method Submitted'].isna()),
67
+ 'Method Submitted'
68
+ ] = 'Online'
69
+ data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
70
+
71
+ # Drop rows with missing critical values
72
+ data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
73
+
74
+ # Extract and clean location data
75
+ data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
76
+ data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
77
+
78
+ # Ensure Year Reported is integer
79
+ data['Year Reported'] = data['Year Reported'].astype(int)
80
+
81
+ return data
82
+
83
+ except Exception as e:
84
+ st.error(f"Error in data preprocessing: {str(e)}")
85
+ raise e
86
 
87
  # Load the data
 
88
  try:
89
+ data = load_and_clean_data()
90
  st.success("Data successfully loaded and cleaned!")
91
  except Exception as e:
92
  st.error(f"Error loading data: {str(e)}")
93
  st.stop()
94
 
95
  # Create sidebar
96
+
97
  st.sidebar.header("Dashboard Controls")
98
+
99
+ # Get unique years and convert to list for selectbox
100
+ year_list = sorted(data['Year Reported'].unique().tolist())
101
+ year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers
102
+
103
  selected_year = st.sidebar.selectbox(
104
  "Select Year",
105
+ options=year_options,
106
  )
 
107
  # Add visualization type selector
108
  viz_type = st.sidebar.selectbox(
109
  "Select Visualization",
 
111
  "Submission Methods", "Processing Time"]
112
  )
113
 
114
+
115
  # Filter data based on selected year
116
+ if selected_year == 'All Time':
117
+ filtered_data = data # Use complete dataset when 'All Time' is selected
118
+ else:
119
+ filtered_data = data[data['Year Reported'] == selected_year]
120
 
121
+ # Update header text
122
+ if selected_year == 'All Time':
123
+ st.header("Analysis for All Time")
124
+ else:
125
+ st.header(f"Analysis for Year {selected_year}")
126
  # Main content
 
127
 
128
+ # Create metrics
129
+ # Create metrics
130
+ # Create metrics
131
+ # Create metrics
132
  # Create metrics
133
  col1, col2, col3 = st.columns(3)
134
  with col1:
135
  st.metric("Total Complaints", len(filtered_data))
136
  with col2:
137
+ avg_time = filtered_data['Processing Time'].mean()
138
+ st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
 
 
 
 
 
139
  with col3:
140
+ if not filtered_data.empty:
141
+ most_common = filtered_data['Type of Complaint'].value_counts().index[0]
142
+ st.metric("Most Common Type", most_common)
143
+ else:
144
+ st.metric("Most Common Type", "N/A")
145
  if viz_type == "Complaint Types":
146
+ # Interactive Pie Chart
147
+ st.subheader("Interactive Complaint Types Pie Chart")
148
+ complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
149
+ complaint_counts.columns = ['Complaint Type', 'Count']
150
+
151
+ fig = px.pie(
152
+ complaint_counts,
153
+ names='Complaint Type',
154
+ values='Count',
155
+ title=f'Complaint Types Distribution in {selected_year}',
156
+ hole=0.4 # Donut style
157
+ )
158
+ fig.update_traces(textinfo='percent+label')
159
+ st.plotly_chart(fig, use_container_width=True)
160
+
161
  elif viz_type == "Geographic Distribution":
162
+ # Clustered Heatmap
163
+ st.subheader("Clustered Heatmap of Complaints")
164
+ map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
165
+ m = folium.Map(location=map_center, zoom_start=12)
166
+
167
+ heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
168
+ HeatMap(heat_data).add_to(m)
169
+
170
+ st_data = st_folium(m, width=700, height=500)
171
+
172
+
173
  elif viz_type == "Resolution Status":
174
+ st.subheader("Complaint Resolution Status")
175
+ fig, ax = plt.subplots(figsize=(10, 6))
176
+ resolution_counts = filtered_data['Disposition'].value_counts()
177
+ sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
178
+ plt.title(f'Resolution Status Distribution in {selected_year}')
179
+ st.pyplot(fig)
180
+
181
  elif viz_type == "Submission Methods":
182
+ st.subheader("Submission Methods Analysis")
183
+ fig, ax = plt.subplots(figsize=(10, 6))
184
+ submission_counts = filtered_data['Method Submitted'].value_counts()
185
+ sns.barplot(x=submission_counts.values, y=submission_counts.index)
186
+ plt.title(f'Submission Methods in {selected_year}')
187
+ st.pyplot(fig)
188
+
189
+
190
  elif viz_type == "Processing Time":
191
+ st.subheader("Processing Time Analysis")
192
+ # Filter for resolved cases only
193
+ resolved_data = filtered_data[filtered_data['File Close Date'].notna()]
194
+ if len(resolved_data) > 0:
195
+ fig, ax = plt.subplots(figsize=(10, 6))
196
+ sns.histplot(data=resolved_data, x='Processing Time', bins=30)
197
+ plt.title(f'Distribution of Processing Times in {selected_year}')
198
+ plt.xlabel('Processing Time (Days)')
199
+ st.pyplot(fig)
200
+ else:
201
+ st.write("No resolved cases in this period")
202
+
203
+ # Additional insights
204
+ st.header("Key Insights")
205
+ col1, col2 = st.columns(2)
206
+
207
+ with col1:
208
+ st.subheader("Top 3 Complaint Types")
209
+ top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
210
+ st.write(top_complaints)
211
+
212
+ with col2:
213
+ st.subheader("Resolution Efficiency")
214
+ resolution_rate = (filtered_data['Disposition'].value_counts() /
215
+ len(filtered_data) * 100).round(2)
216
+ st.write(resolution_rate)
217
+
218
+ # Footer
219
+ st.markdown("---")
220
+ st.markdown("Dataset provided by the City of Urbana Open Data Portal")