File size: 8,070 Bytes
ba61270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
from streamlit_folium import st_folium
import plotly.express as px
from datetime import datetime

# Set page config
st.set_page_config(page_title="Nuisance Complaints Dashboard", layout="wide")

# Title and introduction
st.title("Nuisance Complaints Analysis Dashboard")
st.markdown("""
**Team Members:** 
* Lu Chang (luchang2@illinois.edu)
* Qiming Li (qimingl4@illinois.edu)
* Ruchita Alate (ralate2@illinois.edu)
* Shreyas Kulkarni (ssk16@illinois.edu)
* Vishal Devulapalli (nsd3@illinois.edu)
""")
st.write("This dashboard analyzes nuisance complaints data from the City of Urbana.")

# Load and clean data
@st.cache_data
def load_and_clean_data():
    try:
        # Load data
        data = pd.read_csv('Nuisance_Complaints.csv')
        
        # Drop rows with missing 'File Number'
        data = data.dropna(subset=['File Number'])
        
        # Convert dates and handle date-related columns
        data['Date Reported'] = pd.to_datetime(data['Date Reported'])
        data['Date Notice Mailed or Given'] = pd.to_datetime(data['Date Notice Mailed or Given'])
        data['File Close Date'] = pd.to_datetime(data['File Close Date'], errors='coerce')
        
        # Handle 'Date Notice Mailed or Given'
        median_delay = (data['Date Notice Mailed or Given'] - data['Date Reported']).dt.days.median()
        data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Notice Mailed or Given'] = \
            data.loc[data['Date Notice Mailed or Given'].isna(), 'Date Reported'] + pd.Timedelta(days=median_delay)
        
        # Handle 'Type of Complaint'
        data['Type of Complaint'] = data['Type of Complaint'].fillna('Unknown')
        
        # Handle 'Disposition'
        most_common_disposition = data.groupby('Type of Complaint')['Disposition'].agg(
            lambda x: x.mode().iloc[0] if len(x.mode()) > 0 else 'Pending'
        )
        data['Disposition'] = data.apply(
            lambda row: most_common_disposition[row['Type of Complaint']] 
            if pd.isna(row['Disposition']) else row['Disposition'], 
            axis=1
        )
        
        # Calculate processing time for resolved cases
        data['Processing Time'] = np.where(
            data['File Close Date'].notna(),
            (data['File Close Date'] - data['Date Reported']).dt.days,
            np.nan
        )
        
        # Handle 'Method Submitted'
        data.loc[
            (data['Submitted Online?']) & (data['Method Submitted'].isna()),
            'Method Submitted'
        ] = 'Online'
        data['Method Submitted'] = data['Method Submitted'].fillna(data['Method Submitted'].mode()[0])
        
        # Drop rows with missing critical values
        data = data.dropna(subset=['Submitted Online?', 'Mapped Location'])
        
        # Extract and clean location data
        data['Latitude'] = data['Mapped Location'].str.extract(r'\(([^,]+),')[0].astype(float)
        data['Longitude'] = data['Mapped Location'].str.extract(r', ([^,]+)\)')[0].astype(float)
        
        # Ensure Year Reported is integer
        data['Year Reported'] = data['Year Reported'].astype(int)
        
        return data
        
    except Exception as e:
        st.error(f"Error in data preprocessing: {str(e)}")
        raise e

# Load the data
try:
    data = load_and_clean_data()
    st.success("Data successfully loaded and cleaned!")
except Exception as e:
    st.error(f"Error loading data: {str(e)}")
    st.stop()

# Create sidebar

st.sidebar.header("Dashboard Controls")

# Get unique years and convert to list for selectbox
year_list = sorted(data['Year Reported'].unique().tolist())
year_options = ['All Time'] + [int(year) for year in year_list]  # Convert years to integers

selected_year = st.sidebar.selectbox(
    "Select Year",
    options=year_options,
)
# Add visualization type selector
viz_type = st.sidebar.selectbox(
    "Select Visualization",
    ["Complaint Types", "Geographic Distribution", "Resolution Status",
     "Submission Methods", "Complaints by Disposition"]
)


# Filter data based on selected year
if selected_year == 'All Time':
    filtered_data = data  # Use complete dataset when 'All Time' is selected
else:
    filtered_data = data[data['Year Reported'] == selected_year]

# Update header text
if selected_year == 'All Time':
    st.header("Analysis for All Time")
else:
    st.header(f"Analysis for Year {selected_year}")
# Main content

# Create metrics
# Create metrics
# Create metrics
# Create metrics
# Create metrics
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Total Complaints", len(filtered_data))
with col2:
    avg_time = filtered_data['Processing Time'].mean()
    st.metric("Average Processing Time", f"{avg_time:.1f} days" if pd.notna(avg_time) else "N/A")
with col3:
    if not filtered_data.empty:
        most_common = filtered_data['Type of Complaint'].value_counts().index[0]
        st.metric("Most Common Type", most_common)
    else:
        st.metric("Most Common Type", "N/A")
if viz_type == "Complaint Types":
    # Interactive Pie Chart
    st.subheader("Interactive Complaint Types Pie Chart")
    complaint_counts = filtered_data['Type of Complaint'].value_counts().reset_index()
    complaint_counts.columns = ['Complaint Type', 'Count']

    fig = px.pie(
        complaint_counts,
        names='Complaint Type',
        values='Count',
        title=f'Complaint Types Distribution in {selected_year}',
        hole=0.4  # Donut style
    )
    fig.update_traces(textinfo='percent+label')
    st.plotly_chart(fig, use_container_width=True)

elif viz_type == "Geographic Distribution":
    # Clustered Heatmap
    st.subheader("Clustered Heatmap of Complaints")
    map_center = [filtered_data['Latitude'].mean(), filtered_data['Longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=12)

    heat_data = filtered_data[['Latitude', 'Longitude']].dropna().values.tolist()
    HeatMap(heat_data).add_to(m)

    st_data = st_folium(m, width=700, height=500)


elif viz_type == "Resolution Status":
    st.subheader("Complaint Resolution Status")
    fig, ax = plt.subplots(figsize=(10, 6))
    resolution_counts = filtered_data['Disposition'].value_counts()
    sns.barplot(x=resolution_counts.values, y=resolution_counts.index)
    plt.title(f'Resolution Status Distribution in {selected_year}')
    st.pyplot(fig)

elif viz_type == "Submission Methods":
    st.subheader("Submission Methods Analysis")
    fig, ax = plt.subplots(figsize=(10, 6))
    submission_counts = filtered_data['Method Submitted'].value_counts()
    sns.barplot(x=submission_counts.values, y=submission_counts.index)
    plt.title(f'Submission Methods in {selected_year}')
    st.pyplot(fig)


elif viz_type == "Complaints by Disposition":
    st.subheader("Complaints by Disposition")
    disposition_counts = filtered_data['Disposition'].value_counts()
    
    if not disposition_counts.empty:
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(x=disposition_counts.values, y=disposition_counts.index, palette="viridis", ax=ax)
        ax.set_title(f'Complaints by Disposition in {selected_year}', fontsize=14)
        ax.set_xlabel('Number of Complaints', fontsize=12)
        ax.set_ylabel('Disposition', fontsize=12)
        st.pyplot(fig)
    else:
        st.write("No data available for the selected year.")

# Additional insights
st.header("Key Insights")
col1, col2 = st.columns(2)

with col1:
    st.subheader("Top 3 Complaint Types")
    top_complaints = filtered_data['Type of Complaint'].value_counts().head(3)
    st.write(top_complaints)

with col2:
    st.subheader("Resolution Efficiency")
    resolution_rate = (filtered_data['Disposition'].value_counts() /
                      len(filtered_data) * 100).round(2)
    st.write(resolution_rate)

# Footer
st.markdown("---")
st.markdown("Dataset provided by the City of Urbana Open Data Portal")