ralate2 commited on
Commit
46f241a
·
verified ·
1 Parent(s): 90bb107

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -143
app.py CHANGED
@@ -1,158 +1,91 @@
1
- import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
 
6
  from datetime import datetime
7
 
8
- # Setting page config
9
- st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")
10
-
11
- # Project Title and introduction
12
- st.title("Nuisance Complaints Analysis Dashboard")
13
- st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")
14
-
15
- # Loading and cleaning data
16
- @st.cache_data
17
  def load_and_clean_data():
18
- # Loading data
19
- data = pd.read_csv('Nuisance_Complaints_20241130.csv')
20
-
21
- # Dropping rows with missing File Number
22
- data = data.dropna(subset=['File Number'])
23
-
24
- # Handling Date Notice Mailed or Given
25
- data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
26
- data['Date Reported'] = pd.to_datetime(data['Date Reported'])
27
- median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
28
- data['Date Notice Mailed or Given'].fillna(data['Date Reported'] + pd.to_timedelta(median_delay, unit='D'), inplace=True)
29
-
30
- # Handling Type of Complaint
31
- data['Type of Complaint'].fillna('Unknown', inplace=True)
32
-
33
- # Handling Disposition
34
- most_common_disposition = data.groupby('Type of Complaint')['Disposition'].apply(
35
- lambda x: x.mode()[0] if not x.mode().empty else 'Pending')
36
- data['Disposition'] = data.apply(
37
- lambda row: most_common_disposition[row['Type of Complaint']]
38
- if pd.isnull(row['Disposition']) else row['Disposition'], axis=1)
39
-
40
- # Handling File Close Date
41
- data['File Close Date'] = data['File Close Date'].fillna('Unresolved')
42
-
43
- # Handling Method Submitted
44
- data['Method Submitted'] = data.apply(
45
- lambda row: 'Online' if row['Submitted Online?'] and pd.isnull(row['Method Submitted'])
46
- else row['Method Submitted'], axis=1)
47
- mode_method = data['Method Submitted'].mode()[0]
48
- data['Method Submitted'].fillna(mode_method, inplace=True)
49
-
50
- # Dropping rows with missing Submitted Online?
51
- data = data.dropna(subset=['Submitted Online?'])
52
-
53
- # Handling rows with missing Mapped Location
54
- data = data.dropna(subset=['Mapped Location'])
55
-
56
- # Extractingh latitude and longitude
57
- data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
58
- data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)').astype(float)
59
-
60
- return data
61
-
62
- # Loading the data
63
- try:
64
- data = load_and_clean_data()
65
- st.success("Data successfully loaded and cleaned!")
66
- except Exception as e:
67
- st.error(f"Error loading data: {str(e)}")
68
- st.stop()
69
-
70
- # Creating sidebar
71
- st.sidebar.header("Dashboard Controls")
72
- selected_year = st.sidebar.selectbox(
73
- "Select Year",
74
- options=sorted(data['Year Reported'].unique()),
75
- )
76
-
77
- # Adding visualization type selector
78
- viz_type = st.sidebar.selectbox(
79
- "Select Visualization",
80
- ["Complaint Types", "Geographic Distribution", "Resolution Status",
81
- "Submission Methods", "Processing Time"]
82
- )
83
 
84
- # Filter data based on selected year
85
- filtered_data = data[data['Year Reported'] == selected_year]
 
 
86
 
87
- # Main content
88
- st.header(f"Analysis for Year {selected_year}")
89
-
90
- # Create metrics
91
- col1, col2, col3 = st.columns(3)
92
- with col1:
93
- st.metric("Total Complaints", len(filtered_data))
94
- with col2:
95
- avg_process_time = (pd.to_datetime(filtered_data['File Close Date']) -
96
- filtered_data['Date Reported']).dt.days.mean()
97
- st.metric("Average Processing Time", f"{avg_process_time:.1f} days")
98
- with col3:
99
- st.metric("Most Common Type", filtered_data['Type of Complaint'].mode()[0])
100
-
101
- # Create visualizations based on selection
102
- if viz_type == "Complaint Types":
103
- st.subheader("Distribution of Complaint Types")
104
- fig, ax = plt.subplots(figsize=(10, 6))
105
- complaint_counts = filtered_data['Type of Complaint'].value_counts()
106
- sns.barplot(x=complaint_counts.values, y=complaint_counts.index)
107
- plt.title(f'Complaint Types Distribution in {selected_year}')
108
- st.pyplot(fig)
109
-
110
- elif viz_type == "Geographic Distribution":
111
- st.subheader("Geographic Distribution of Complaints")
112
- st.map(filtered_data[['Latitude', 'Longitude']])
113
-
114
- elif viz_type == "Resolution Status":
115
- st.subheader("Complaint Resolution Status")
116
- fig, ax = plt.subplots(figsize=(10, 6))
117
- resolution_counts = filtered_data['Disposition'].value_counts()
118
- sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
119
- plt.title(f'Resolution Status Distribution in {selected_year}')
120
- st.pyplot(fig)
121
 
122
- elif viz_type == "Submission Methods":
123
- st.subheader("Submission Methods Analysis")
124
- fig, ax = plt.subplots(figsize=(10, 6))
125
- submission_counts = filtered_data['Method Submitted'].value_counts()
126
- sns.barplot(x=submission_counts.values, y=submission_counts.index)
127
- plt.title(f'Submission Methods in {selected_year}')
128
- st.pyplot(fig)
129
 
130
- else: # Processing Time
131
- st.subheader("Processing Time Analysis")
132
- fig, ax = plt.subplots(figsize=(10, 6))
133
- sns.histplot(data=filtered_data,
134
- x=(pd.to_datetime(filtered_data['File Close Date']) -
135
- filtered_data['Date Reported']).dt.days,
136
- bins=30)
137
- plt.title(f'Distribution of Processing Times in {selected_year}')
138
- plt.xlabel('Processing Time (Days)')
139
- st.pyplot(fig)
140
 
141
- # Additional insights
142
- st.header("Key Insights")
143
- col1, col2 = st.columns(2)
144
 
145
- with col1:
146
- st.subheader("Top 3 Complaint Types")
147
- top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
148
- st.write(top_complaints)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- with col2:
151
- st.subheader("Resolution Efficiency")
152
- resolution_rate = (filtered_data['Disposition'].value_counts() /
153
- len(filtered_data) * 100).round(2)
154
- st.write(resolution_rate)
155
 
156
- # Footer
157
- st.markdown("---")
158
- st.markdown("Dataset provided by the City of Urbana Open Data Portal")
 
 
1
  import pandas as pd
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
+ import plotly.express as px
6
  from datetime import datetime
7
 
8
+ # Load and clean data
 
 
 
 
 
 
 
 
9
  def load_and_clean_data():
10
+ data = pd.read_csv('/content/Nuisance_Complaints_20241204.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Convert date columns
13
+ date_columns = ['Date Reported', 'Date Notice Mailed or Given', 'File Close Date']
14
+ for col in date_columns:
15
+ data[col] = pd.to_datetime(data[col], errors='coerce')
16
 
17
+ # Handle missing values
18
+ data['Type of Complaint'].fillna('Unknown', inplace=True)
19
+ data['Disposition'].fillna('Pending', inplace=True)
20
+ data['Method Submitted'].fillna('Not Specified', inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Calculate processing time
23
+ data['Processing Time'] = (data['File Close Date'] - data['Date Reported']).dt.days
 
 
 
 
 
24
 
25
+ # Remove duplicates
26
+ data.drop_duplicates(subset=['File Number'], keep='first', inplace=True)
 
 
 
 
 
 
 
 
27
 
28
+ return data
 
 
29
 
30
+ # Create visualizations
31
+ def create_visualizations(data):
32
+ # 1. Complaint Types Over Time
33
+ plt.figure(figsize=(12, 6))
34
+ complaints_over_time = data.groupby(['Year Reported', 'Type of Complaint']).size().unstack()
35
+ complaints_over_time.plot(kind='line', marker='o')
36
+ plt.title('Trends in Complaint Types Over Years')
37
+ plt.xlabel('Year')
38
+ plt.ylabel('Number of Complaints')
39
+ plt.legend(title='Complaint Type', bbox_to_anchor=(1.05, 1))
40
+ plt.tight_layout()
41
+ plt.show()
42
+
43
+ # 2. Resolution Distribution
44
+ plt.figure(figsize=(10, 6))
45
+ sns.countplot(data=data, y='Disposition', order=data['Disposition'].value_counts().index)
46
+ plt.title('Distribution of Complaint Resolutions')
47
+ plt.xlabel('Count')
48
+ plt.ylabel('Resolution Type')
49
+ plt.tight_layout()
50
+ plt.show()
51
+
52
+ # 3. Average Processing Time by Submission Method
53
+ plt.figure(figsize=(10, 6))
54
+ avg_processing_time = data.groupby('Method Submitted')['Processing Time'].mean().sort_values()
55
+ sns.barplot(x=avg_processing_time.values, y=avg_processing_time.index)
56
+ plt.title('Average Processing Time by Submission Method')
57
+ plt.xlabel('Average Processing Time (Days)')
58
+ plt.ylabel('Submission Method')
59
+ plt.tight_layout()
60
+ plt.show()
61
+
62
+ # 4. Monthly Distribution of Complaints
63
+ plt.figure(figsize=(10, 6))
64
+ monthly_complaints = data.groupby('Month Reported').size()
65
+ sns.barplot(x=monthly_complaints.index, y=monthly_complaints.values)
66
+ plt.title('Monthly Distribution of Complaints')
67
+ plt.xlabel('Month')
68
+ plt.ylabel('Number of Complaints')
69
+ plt.tight_layout()
70
+ plt.show()
71
+
72
+ # 5. Complaint Type Distribution
73
+ plt.figure(figsize=(10, 6))
74
+ sns.countplot(data=data, y='Type of Complaint',
75
+ order=data['Type of Complaint'].value_counts().index)
76
+ plt.title('Distribution of Complaint Types')
77
+ plt.xlabel('Count')
78
+ plt.ylabel('Complaint Type')
79
+ plt.tight_layout()
80
+ plt.show()
81
+
82
+ # Main execution
83
+ def main():
84
+ # Load and clean data
85
+ data = load_and_clean_data()
86
 
87
+ # Create visualizations
88
+ create_visualizations(data)
 
 
 
89
 
90
+ if __name__ == "__main__":
91
+ main()