ralate2 commited on
Commit
56b8432
·
verified ·
1 Parent(s): 342f639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -70
app.py CHANGED
@@ -1,91 +1,185 @@
 
1
  import pandas as pd
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
- import plotly.express as px
6
  from datetime import datetime
7
 
 
 
 
 
 
 
 
 
8
  # Load and clean data
 
9
  def load_and_clean_data():
 
10
  data = pd.read_csv('Nuisance_Complaints_20241130.csv')
11
 
12
- # Convert date columns
13
- date_columns = ['Date Reported', 'Date Notice Mailed or Given', 'File Close Date']
14
- for col in date_columns:
15
- data[col] = pd.to_datetime(data[col], errors='coerce')
16
 
17
- # Handle missing values
 
 
 
 
 
 
18
  data['Type of Complaint'].fillna('Unknown', inplace=True)
19
- data['Disposition'].fillna('Pending', inplace=True)
20
- data['Method Submitted'].fillna('Not Specified', inplace=True)
21
 
22
- # Calculate processing time
 
 
 
 
 
 
 
 
 
 
23
  data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
24
 
25
- # Remove duplicates
26
- data.drop_duplicates(subset=['File Number'], keep='first', inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  return data
29
 
30
- # Create visualizations
31
- def create_visualizations(data):
32
- # 1. Complaint Types Over Time
33
- plt.figure(figsize=(12, 6))
34
- complaints_over_time = data.groupby(['Year Reported', 'Type of Complaint']).size().unstack()
35
- complaints_over_time.plot(kind='line', marker='o')
36
- plt.title('Trends in Complaint Types Over Years')
37
- plt.xlabel('Year')
38
- plt.ylabel('Number of Complaints')
39
- plt.legend(title='Complaint Type', bbox_to_anchor=(1.05, 1))
40
- plt.tight_layout()
41
- plt.show()
42
-
43
- # 2. Resolution Distribution
44
- plt.figure(figsize=(10, 6))
45
- sns.countplot(data=data, y='Disposition', order=data['Disposition'].value_counts().index)
46
- plt.title('Distribution of Complaint Resolutions')
47
- plt.xlabel('Count')
48
- plt.ylabel('Resolution Type')
49
- plt.tight_layout()
50
- plt.show()
51
-
52
- # 3. Average Processing Time by Submission Method
53
- plt.figure(figsize=(10, 6))
54
- avg_processing_time = data.groupby('Method Submitted')['Processing Time'].mean().sort_values()
55
- sns.barplot(x=avg_processing_time.values, y=avg_processing_time.index)
56
- plt.title('Average Processing Time by Submission Method')
57
- plt.xlabel('Average Processing Time (Days)')
58
- plt.ylabel('Submission Method')
59
- plt.tight_layout()
60
- plt.show()
61
-
62
- # 4. Monthly Distribution of Complaints
63
- plt.figure(figsize=(10, 6))
64
- monthly_complaints = data.groupby('Month Reported').size()
65
- sns.barplot(x=monthly_complaints.index, y=monthly_complaints.values)
66
- plt.title('Monthly Distribution of Complaints')
67
- plt.xlabel('Month')
68
- plt.ylabel('Number of Complaints')
69
- plt.tight_layout()
70
- plt.show()
71
-
72
- # 5. Complaint Type Distribution
73
- plt.figure(figsize=(10, 6))
74
- sns.countplot(data=data, y='Type of Complaint',
75
- order=data['Type of Complaint'].value_counts().index)
76
- plt.title('Distribution of Complaint Types')
77
- plt.xlabel('Count')
78
- plt.ylabel('Complaint Type')
79
- plt.tight_layout()
80
- plt.show()
81
-
82
- # Main execution
83
- def main():
84
- # Load and clean data
85
  data = load_and_clean_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Create visualizations
88
- create_visualizations(data)
 
 
 
89
 
90
- if __name__ == "__main__":
91
- main()
 
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
 
6
  from datetime import datetime
7
 
8
+ # Set page config
9
+ st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
10
+
11
+ # Title and introduction
12
+ st.title("Nuisance Complaints Analysis Dashboard")
13
+ st.markdown("**Team Members:** Shreyas Kulkarni (ssk16@illinois.edu) Vishal Devulapalli (nsd3@illinois.edu) Lu Chang (luchang2@illinois.edu) Li Qiming (qimingl4@illinois.edu) Ruchita Alate (ralate2@illinois.edu) ")
14
+ st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
15
+
16
  # Load and clean data
17
+ @st.cache_data
18
  def load_and_clean_data():
19
+ # Load data
20
  data = pd.read_csv('Nuisance_Complaints_20241130.csv')
21
 
22
+ # Drop rows with missing File Number
23
+ data = data.dropna(subset=['File Number'])
 
 
24
 
25
+ # Handle Date Notice Mailed or Given
26
+ data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
27
+ data['Date Reported'] = pd.to_datetime(data['Date Reported'])
28
+ median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
29
+ data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
30
+
31
+ # Handle Type of Complaint
32
  data['Type of Complaint'].fillna('Unknown', inplace=True)
 
 
33
 
34
+ # Handle Disposition
35
+ most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
36
+ lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
37
+ data['Disposition'] = data.apply(
38
+ lambda row: most_common_disposition[row['Type of Complaint']]
39
+ if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
40
+
41
+ # Handle File Close Date
42
+ data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
43
+
44
+ # Calculate processing time only for resolved cases
45
  data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
46
 
47
+ # Handle Method Submitted
48
+ data['Method Submitted'] = data.apply(
49
+ lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
50
+ else row['Method Submitted'], axis=1)
51
+ mode_method = data['Method Submitted'].mode()[0]
52
+ data['Method Submitted'].fillna(mode_method, inplace=True)
53
+
54
+ # Drop rows with missing Submitted Online?
55
+ data = data.dropna(subset=['Submitted Online?'])
56
+
57
+ # Handle Mapped Location
58
+ data = data.dropna(subset=['Mapped Location'])
59
+
60
+ # Extract latitude and longitude
61
+ data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
62
+ data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
63
 
64
  return data
65
 
66
+ # Load the data
67
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  data = load_and_clean_data()
69
+ st.success("Data successfully loaded and cleaned!")
70
+ except Exception as e:
71
+ st.error(f"Error loading data: {str(e)}")
72
+ st.stop()
73
+
74
+ # Create sidebar
75
+ st.sidebar.header("Dashboard Controls")
76
+ selected_year = st.sidebar.selectbox(
77
+ "Select Year",
78
+ options=sorted(data['Year Reported'].unique()),
79
+ )
80
+
81
+ # Add visualization type selector
82
+ viz_type = st.sidebar.selectbox(
83
+ "Select Visualization",
84
+ ["Complaint Types", "Geographic Distribution", "Resolution Status",
85
+ "Submission Methods", "Processing Time"]
86
+ )
87
+
88
+ # Filter data based on selected year
89
+ filtered_data = data[data['Year Reported'] == selected_year]
90
+
91
+ # Main content
92
+ st.header(f"Analysis for Year {selected_year}")
93
+
94
+ # Create metrics
95
+ col1, col2, col3 = st.columns(3)
96
+ with col1:
97
+ st.metric("Total Complaints", len(filtered_data))
98
+ with col2:
99
+ # Calculate average processing time only for resolved cases
100
+ resolved_cases = filtered_data[filtered_data['File Close Date'].notna()]
101
+ if len(resolved_cases) > 0:
102
+ avg_process_time = resolved_cases['Processing Time'].mean()
103
+ st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
104
+ else:
105
+ st.metric("Average Processing Time", "N/A")
106
+ with col3:
107
+ st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
108
+
109
+
110
+ if viz_type == "Complaint Types":
111
+ # Interactive Pie Chart
112
+ st.subheader("Interactive Complaint Types Pie Chart")
113
+ complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
114
+ complaint_counts.columns = ['Complaint Type', 'Count']
115
+
116
+ fig = px.pie(
117
+ complaint_counts,
118
+ names='Complaint Type',
119
+ values='Count',
120
+ title=f'Complaint Types Distribution in {selected_year}',
121
+ hole=0.4 # Donut style
122
+ )
123
+ fig.update_traces(textinfo='percent+label')
124
+ st.plotly_chart(fig, use_container_width=True)
125
+
126
+ elif viz_type == "Geographic Distribution":
127
+ # Clustered Heatmap
128
+ st.subheader("Clustered Heatmap of Complaints")
129
+ map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
130
+ m = folium.Map(location=map_center, zoom_start=12)
131
+
132
+ heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
133
+ HeatMap(heat_data).add_to(m)
134
+
135
+ st_data = st_folium(m, width=700, height=500)
136
+
137
+
138
+ elif viz_type == "Resolution Status":
139
+ st.subheader("Complaint Resolution Status")
140
+ fig, ax = plt.subplots(figsize=(10, 6))
141
+ resolution_counts = filtered_data['Disposition'].value_counts()
142
+ sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
143
+ plt.title(f'Resolution Status Distribution in {selected_year}')
144
+ st.pyplot(fig)
145
+
146
+ elif viz_type == "Submission Methods":
147
+ st.subheader("Submission Methods Analysis")
148
+ fig, ax = plt.subplots(figsize=(10, 6))
149
+ submission_counts = filtered_data['Method Submitted'].value_counts()
150
+ sns.barplot(x=submission_counts.values, y=submission_counts.index)
151
+ plt.title(f'Submission Methods in {selected_year}')
152
+ st.pyplot(fig)
153
+
154
+
155
+ elif viz_type == "Processing Time":
156
+ st.subheader("Processing Time Analysis")
157
+ # Filter for resolved cases only
158
+ resolved_data = filtered_data[filtered_data['File Close Date'].notna()]
159
+ if len(resolved_data) > 0:
160
+ fig, ax = plt.subplots(figsize=(10, 6))
161
+ sns.histplot(data=resolved_data, x='Processing Time', bins=30)
162
+ plt.title(f'Distribution of Processing Times in {selected_year}')
163
+ plt.xlabel('Processing Time (Days)')
164
+ st.pyplot(fig)
165
+ else:
166
+ st.write("No resolved cases in this period")
167
+
168
+ # Additional insights
169
+ st.header("Key Insights")
170
+ col1, col2 = st.columns(2)
171
+
172
+ with col1:
173
+ st.subheader("Top 3 Complaint Types")
174
+ top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
175
+ st.write(top_complaints)
176
 
177
+ with col2:
178
+ st.subheader("Resolution Efficiency")
179
+ resolution_rate = (filtered_data['Disposition'].value_counts() /
180
+ len(filtered_data) * 100).round(2)
181
+ st.write(resolution_rate)
182
 
183
+ # Footer
184
+ st.markdown("---")
185
+ st.markdown("Dataset provided by the City of Urbana Open Data Portal")