ralate2 commited on
Commit
a94ed92
·
verified ·
1 Parent(s): 68f7786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -102
app.py CHANGED
@@ -1,9 +1,6 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
- import plotly.express as px
7
  from datetime import datetime
8
 
9
  # Set page config
@@ -11,62 +8,61 @@ st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
11
 
12
  # Title and introduction
13
  st.title("Nuisance Complaints Analysis Dashboard")
14
- st.markdown("**Team Members:** Lu Chang (luchang2@illinois.edu), Qiming Li (qimingl4@illinois.edu), Ruchita Alate (ralate2@illinois.edu), Shreyas Kulkarni (ssk16@illinois.edu), Vishal Devulapalli (nsd3@illinois.edu)")
15
  st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
16
 
17
  # Load and clean data
18
  @st.cache_data
19
- def load_and_clean_data():
20
  # Load data
21
- data = pd.read_csv('Nuisance_Complaints_20241130.csv')
22
-
23
- # Drop rows with missing File Number
24
  data = data.dropna(subset=['File Number'])
25
-
26
- # Handle Date Notice Mailed or Given
27
  data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
28
  data['Date Reported'] = pd.to_datetime(data['Date Reported'])
29
  median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
30
  data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
31
-
32
- # Handle Type of Complaint
33
  data['Type of Complaint'].fillna('Unknown', inplace=True)
34
-
35
- # Handle Disposition
36
  most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
37
  lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
38
  data['Disposition'] = data.apply(
39
- lambda row: most_common_disposition[row['Type of Complaint']]
40
  if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
41
-
42
- # Handle File Close Date
43
  data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
44
-
 
45
  # Calculate processing time only for resolved cases
46
  data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
47
-
48
- # Handle Method Submitted
49
  data['Method Submitted'] = data.apply(
50
  lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
51
  else row['Method Submitted'], axis=1)
52
  mode_method = data['Method Submitted'].mode()[0]
53
  data['Method Submitted'].fillna(mode_method, inplace=True)
54
-
55
- # Drop rows with missing Submitted Online?
56
  data = data.dropna(subset=['Submitted Online?'])
57
-
58
- # Handle Mapped Location
59
- data = data.dropna(subset=['Mapped Location'])
60
-
61
- # Extract latitude and longitude
62
  data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
63
  data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
64
-
65
  return data
66
 
67
  # Load the data
 
68
  try:
69
- data = load_and_clean_data()
70
  st.success("Data successfully loaded and cleaned!")
71
  except Exception as e:
72
  st.error(f"Error loading data: {str(e)}")
@@ -98,7 +94,7 @@ with col1:
98
  st.metric("Total Complaints", len(filtered_data))
99
  with col2:
100
  # Calculate average processing time only for resolved cases
101
- resolved_cases = filtered_data[filtered_data['File Close Date'].notna()]
102
  if len(resolved_cases) > 0:
103
  avg_process_time = resolved_cases['Processing Time'].mean()
104
  st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
@@ -107,80 +103,14 @@ with col2:
107
  with col3:
108
  st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
109
 
110
-
111
  if viz_type == "Complaint Types":
112
- # Interactive Pie Chart
113
- st.subheader("Interactive Complaint Types Pie Chart")
114
- complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
115
- complaint_counts.columns = ['Complaint Type', 'Count']
116
-
117
- fig = px.pie(
118
- complaint_counts,
119
- names='Complaint Type',
120
- values='Count',
121
- title=f'Complaint Types Distribution in {selected_year}',
122
- hole=0.4 # Donut style
123
- )
124
- fig.update_traces(textinfo='percent+label')
125
- st.plotly_chart(fig, use_container_width=True)
126
-
127
  elif viz_type == "Geographic Distribution":
128
- # Clustered Heatmap
129
- st.subheader("Clustered Heatmap of Complaints")
130
- map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
131
- m = folium.Map(location=map_center, zoom_start=12)
132
-
133
- heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
134
- HeatMap(heat_data).add_to(m)
135
-
136
- st_data = st_folium(m, width=700, height=500)
137
-
138
-
139
  elif viz_type == "Resolution Status":
140
- st.subheader("Complaint Resolution Status")
141
- fig, ax = plt.subplots(figsize=(10, 6))
142
- resolution_counts = filtered_data['Disposition'].value_counts()
143
- sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
144
- plt.title(f'Resolution Status Distribution in {selected_year}')
145
- st.pyplot(fig)
146
-
147
  elif viz_type == "Submission Methods":
148
- st.subheader("Submission Methods Analysis")
149
- fig, ax = plt.subplots(figsize=(10, 6))
150
- submission_counts = filtered_data['Method Submitted'].value_counts()
151
- sns.barplot(x=submission_counts.values, y=submission_counts.index)
152
- plt.title(f'Submission Methods in {selected_year}')
153
- st.pyplot(fig)
154
-
155
-
156
  elif viz_type == "Processing Time":
157
- st.subheader("Processing Time Analysis")
158
- # Filter for resolved cases only
159
- resolved_data = filtered_data[filtered_data['File Close Date'].notna()]
160
- if len(resolved_data) > 0:
161
- fig, ax = plt.subplots(figsize=(10, 6))
162
- sns.histplot(data=resolved_data, x='Processing Time', bins=30)
163
- plt.title(f'Distribution of Processing Times in {selected_year}')
164
- plt.xlabel('Processing Time (Days)')
165
- st.pyplot(fig)
166
- else:
167
- st.write("No resolved cases in this period")
168
-
169
- # Additional insights
170
- st.header("Key Insights")
171
- col1, col2 = st.columns(2)
172
-
173
- with col1:
174
- st.subheader("Top 3 Complaint Types")
175
- top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
176
- st.write(top_complaints)
177
-
178
- with col2:
179
- st.subheader("Resolution Efficiency")
180
- resolution_rate = (filtered_data['Disposition'].value_counts() /
181
- len(filtered_data) * 100).round(2)
182
- st.write(resolution_rate)
183
-
184
- # Footer
185
- st.markdown("---")
186
- st.markdown("Dataset provided by the City of Urbana Open Data Portal")
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
 
 
4
  from datetime import datetime
5
 
6
  # Set page config
 
8
 
9
  # Title and introduction
10
  st.title("Nuisance Complaints Analysis Dashboard")
11
+ st.markdown("**Team Members:** Shreyas Kulkarni (ssk16@illinois.edu), Vishal Devulapalli (nsd3@illinois.edu), Lu Chang (luchang2@illinois.edu), Li Qiming (qimingl4@illinois.edu), Ruchita Alate (ralate2@illinois.edu)")
12
  st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
13
 
14
  # Load and clean data
15
  @st.cache_data
16
+ def load_and_clean_data(file_path):
17
  # Load data
18
+ data = pd.read_csv(file_path)
19
+
20
+ # Drop rows with missing 'File Number'
21
  data = data.dropna(subset=['File Number'])
22
+
23
+ # Handle 'Date Notice Mailed or Given': Impute using median time from 'Date Reported'
24
  data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
25
  data['Date Reported'] = pd.to_datetime(data['Date Reported'])
26
  median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
27
  data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
28
+
29
+ # Handle 'Type of Complaint': Fill missing with 'Unknown'
30
  data['Type of Complaint'].fillna('Unknown', inplace=True)
31
+
32
+ # Handle 'Disposition': Impute based on the most common value for the same complaint type
33
  most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
34
  lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
35
  data['Disposition'] = data.apply(
36
+ lambda row: most_common_disposition[row['Type of Complaint']]
37
  if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
38
+
39
+ # Handle 'File Close Date': Fill missing with 'Unresolved'
40
  data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
41
+ data['File Close Date'].fillna('Unresolved', inplace=True)
42
+
43
  # Calculate processing time only for resolved cases
44
  data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
45
+
46
+ # Handle 'Method Submitted': Infer based on 'Submitted Online?'
47
  data['Method Submitted'] = data.apply(
48
  lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
49
  else row['Method Submitted'], axis=1)
50
  mode_method = data['Method Submitted'].mode()[0]
51
  data['Method Submitted'].fillna(mode_method, inplace=True)
52
+
53
+ # Drop rows with missing 'Submitted Online?'
54
  data = data.dropna(subset=['Submitted Online?'])
55
+
56
+ # Handle 'Mapped Location': Extract latitude and longitude
 
 
 
57
  data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
58
  data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
59
+
60
  return data
61
 
62
  # Load the data
63
+ file_path = "Nuisance_Complaints_20241130.csv"
64
  try:
65
+ data = load_and_clean_data(file_path)
66
  st.success("Data successfully loaded and cleaned!")
67
  except Exception as e:
68
  st.error(f"Error loading data: {str(e)}")
 
94
  st.metric("Total Complaints", len(filtered_data))
95
  with col2:
96
  # Calculate average processing time only for resolved cases
97
+ resolved_cases = filtered_data[filtered_data['File Close Date'] != 'Unresolved']
98
  if len(resolved_cases) > 0:
99
  avg_process_time = resolved_cases['Processing Time'].mean()
100
  st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
 
103
  with col3:
104
  st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
105
 
106
+ # Add additional visualizations or tables based on `viz_type` here
107
  if viz_type == "Complaint Types":
108
+ st.write("Visualization for Complaint Types will go here.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  elif viz_type == "Geographic Distribution":
110
+ st.write("Visualization for Geographic Distribution will go here.")
 
 
 
 
 
 
 
 
 
 
111
  elif viz_type == "Resolution Status":
112
+ st.write("Visualization for Resolution Status will go here.")
 
 
 
 
 
 
113
  elif viz_type == "Submission Methods":
114
+ st.write("Visualization for Submission Methods will go here.")
 
 
 
 
 
 
 
115
  elif viz_type == "Processing Time":
116
+ st.write("Visualization for Processing Time will go here.")