ralate2 commited on
Commit
934f02a
·
verified ·
1 Parent(s): 5d0c96a

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -220
app.py DELETED
@@ -1,220 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
- import plotly.express as px
7
- from datetime import datetime
8
-
9
- # Set page config
10
- st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
11
-
12
- # Title and introduction
13
- st.title("Nuisance Complaints Analysis Dashboard")
14
- st.markdown("""
15
- **Team Members:**
16
- * Lu Chang (luchang2@illinois.edu)
17
- * Qiming Li (qimingl4@illinois.edu)
18
- * Ruchita Alate (ralate2@illinois.edu)
19
- * Shreyas Kulkarni (ssk16@illinois.edu)
20
- * Vishal Devulapalli (nsd3@illinois.edu)
21
- """)
22
- st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
23
-
24
- # Load and clean data
25
- @st.cache_data
26
- def load_and_clean_data():
27
- try:
28
- # Load data
29
- data = pd.read_csv('Nuisance_Complaints_20241204.csv')
30
-
31
- # Drop rows with missing 'File Number'
32
- data = data.dropna(subset=['File Number'])
33
-
34
- # Convert dates and handle date-related columns
35
- data['Date Reported'] = pd.to_datetime(data['Date Reported'])
36
- data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
37
- data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
38
-
39
- # Handle 'Date Notice Mailed or Given'
40
- median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
41
- data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
42
- data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
43
-
44
- # Handle 'Type of Complaint'
45
- data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
46
-
47
- # Handle 'Disposition'
48
- most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
49
- lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
50
- )
51
- data['Disposition'] = data.apply(
52
- lambda row: most_common_disposition[row['Type of Complaint']]
53
- if pd.isna(row['Disposition']) else row['Disposition'],
54
- axis=1
55
- )
56
-
57
- # Calculate processing time for resolved cases
58
- data['Processing Time'] = np.where(
59
- data['File Close Date'].notna(),
60
- (data['File Close Date'] - data['Date Reported']).dt.days,
61
- np.nan
62
- )
63
-
64
- # Handle 'Method Submitted'
65
- data.loc[
66
- (data['Submitted Online?']) & (data['Method Submitted'].isna()),
67
- 'Method Submitted'
68
- ] = 'Online'
69
- data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
70
-
71
- # Drop rows with missing critical values
72
- data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
73
-
74
- # Extract and clean location data
75
- data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
76
- data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
77
-
78
- # Ensure Year Reported is integer
79
- data['Year Reported'] = data['Year Reported'].astype(int)
80
-
81
- return data
82
-
83
- except Exception as e:
84
- st.error(f"Error in data preprocessing: {str(e)}")
85
- raise e
86
-
87
- # Load the data
88
- try:
89
- data = load_and_clean_data()
90
- st.success("Data successfully loaded and cleaned!")
91
- except Exception as e:
92
- st.error(f"Error loading data: {str(e)}")
93
- st.stop()
94
-
95
- # Create sidebar
96
-
97
- st.sidebar.header("Dashboard Controls")
98
-
99
- # Get unique years and convert to list for selectbox
100
- year_list = sorted(data['Year Reported'].unique().tolist())
101
- year_options = ['All Time'] + [int(year) for year in year_list] # Convert years to integers
102
-
103
- selected_year = st.sidebar.selectbox(
104
- "Select Year",
105
- options=year_options,
106
- )
107
- # Add visualization type selector
108
- viz_type = st.sidebar.selectbox(
109
- "Select Visualization",
110
- ["Complaint Types", "Geographic Distribution", "Resolution Status",
111
- "Submission Methods", "Processing Time"]
112
- )
113
-
114
-
115
- # Filter data based on selected year
116
- if selected_year == 'All Time':
117
- filtered_data = data # Use complete dataset when 'All Time' is selected
118
- else:
119
- filtered_data = data[data['Year Reported'] == selected_year]
120
-
121
- # Update header text
122
- if selected_year == 'All Time':
123
- st.header("Analysis for All Time")
124
- else:
125
- st.header(f"Analysis for Year {selected_year}")
126
- # Main content
127
-
128
- # Create metrics
129
- # Create metrics
130
- # Create metrics
131
- # Create metrics
132
- # Create metrics
133
- col1, col2, col3 = st.columns(3)
134
- with col1:
135
- st.metric("Total Complaints", len(filtered_data))
136
- with col2:
137
- avg_time = filtered_data['Processing Time'].mean()
138
- st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
139
- with col3:
140
- if not filtered_data.empty:
141
- most_common = filtered_data['Type of Complaint'].value_counts().index[0]
142
- st.metric("Most Common Type", most_common)
143
- else:
144
- st.metric("Most Common Type", "N/A")
145
- if viz_type == "Complaint Types":
146
- # Interactive Pie Chart
147
- st.subheader("Interactive Complaint Types Pie Chart")
148
- complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
149
- complaint_counts.columns = ['Complaint Type', 'Count']
150
-
151
- fig = px.pie(
152
- complaint_counts,
153
- names='Complaint Type',
154
- values='Count',
155
- title=f'Complaint Types Distribution in {selected_year}',
156
- hole=0.4 # Donut style
157
- )
158
- fig.update_traces(textinfo='percent+label')
159
- st.plotly_chart(fig, use_container_width=True)
160
-
161
- elif viz_type == "Geographic Distribution":
162
- # Clustered Heatmap
163
- st.subheader("Clustered Heatmap of Complaints")
164
- map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
165
- m = folium.Map(location=map_center, zoom_start=12)
166
-
167
- heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
168
- HeatMap(heat_data).add_to(m)
169
-
170
- st_data = st_folium(m, width=700, height=500)
171
-
172
-
173
- elif viz_type == "Resolution Status":
174
- st.subheader("Complaint Resolution Status")
175
- fig, ax = plt.subplots(figsize=(10, 6))
176
- resolution_counts = filtered_data['Disposition'].value_counts()
177
- sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
178
- plt.title(f'Resolution Status Distribution in {selected_year}')
179
- st.pyplot(fig)
180
-
181
- elif viz_type == "Submission Methods":
182
- st.subheader("Submission Methods Analysis")
183
- fig, ax = plt.subplots(figsize=(10, 6))
184
- submission_counts = filtered_data['Method Submitted'].value_counts()
185
- sns.barplot(x=submission_counts.values, y=submission_counts.index)
186
- plt.title(f'Submission Methods in {selected_year}')
187
- st.pyplot(fig)
188
-
189
-
190
- elif viz_type == "Processing Time":
191
- st.subheader("Processing Time Analysis")
192
- # Filter for resolved cases only
193
- resolved_data = filtered_data[filtered_data['File Close Date'].notna()]
194
- if len(resolved_data) > 0:
195
- fig, ax = plt.subplots(figsize=(10, 6))
196
- sns.histplot(data=resolved_data, x='Processing Time', bins=30)
197
- plt.title(f'Distribution of Processing Times in {selected_year}')
198
- plt.xlabel('Processing Time (Days)')
199
- st.pyplot(fig)
200
- else:
201
- st.write("No resolved cases in this period")
202
-
203
- # Additional insights
204
- st.header("Key Insights")
205
- col1, col2 = st.columns(2)
206
-
207
- with col1:
208
- st.subheader("Top 3 Complaint Types")
209
- top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
210
- st.write(top_complaints)
211
-
212
- with col2:
213
- st.subheader("Resolution Efficiency")
214
- resolution_rate = (filtered_data['Disposition'].value_counts() /
215
- len(filtered_data) * 100).round(2)
216
- st.write(resolution_rate)
217
-
218
- # Footer
219
- st.markdown("---")
220
- st.markdown("Dataset provided by the City of Urbana Open Data Portal")