spatel54 commited on
Commit
13eaffe
·
1 Parent(s): 9f97e1f
Files changed (1) hide show
  1. src/streamlit_app.py +210 -1
src/streamlit_app.py CHANGED
@@ -13,4 +13,213 @@ forums](https://discuss.streamlit.io).
13
  In the meantime, below is an example of what you can do with just a few lines of code:
14
  """
15
 
16
- st.title("Streamlit Demo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  In the meantime, below is an example of what you can do with just a few lines of code:
14
  """
15
 
16
+ st.title("Crime Data Analysis")
17
+
18
+
19
+ # Load the dataset.
20
+ df = pd.read_csv("crime_data.csv")
21
+
22
+ # Check NaN values and types.
23
+ # df.isna().sum() # No NaN value in our dataframe.
24
+ # df.dtypes # Only "crm_cd_desc" is categorical variable(object).
25
+
26
+ # Test code.
27
+ df.head(5)
28
+
29
+ # Plot 1: Pie chart.
30
+ # Data filteration.
31
+ crm_tot = df["crm_cd_desc"].value_counts()
32
+
33
+ # Calculate the mean of crime cases.
34
+ mean_crm = crm_tot.mean()
35
+
36
+ # Filter out the crime cases that are below the mean of the crime cases.
37
+ crm_tot_filtered = crm_tot[crm_tot > mean_crm]
38
+
39
+ # Method comes from: https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html.
40
+ plt.figure(figsize=(12, 12))
41
+ fig, ax = plt.subplots()
42
+ ax.pie(crm_tot_filtered, labels=crm_tot_filtered.index, autopct='%1.1f%%', labeldistance=1.5, pctdistance=1.2)
43
+
44
+ #-----
45
+
46
+ ### Use this one!!!
47
+ # A more detailed version pie chart based on the previous one.
48
+ # Filter the top 10 crime type.
49
+ top_crimes = (
50
+ df["crm_cd_desc"]
51
+ .value_counts()
52
+ .nlargest(10)
53
+ .reset_index()
54
+ .rename(columns={"index": "Crime Type", "crm_cd_desc": "Count"})
55
+ )
56
+
57
+ # Calculate the percentage of ecah kind of crime.
58
+ top_crimes["Percentage"] = top_crimes["Count"] / top_crimes["Count"].sum()
59
+
60
+
61
+ # Create the pie chart.
62
+ chart = alt.Chart(top_crimes).mark_arc(innerRadius=50).encode(
63
+ theta=alt.Theta(field="Count", type="quantitative"),
64
+ color=alt.Color(field="Crime Type", type="nominal", legend=alt.Legend(title="Crime Type")),
65
+ tooltip=["Crime Type", "Count", alt.Tooltip("Percentage:Q", format=".1%")]
66
+ ).properties(
67
+ title="Top 10 Crime Types Distribution"
68
+ )
69
+
70
+ # Display the plot.
71
+ st.altair_chart(chart, theme="streamlit", use_container_width=True)
72
+
73
+
74
+ #------
75
+
76
+ ### Use this one!!!
77
+ # Count the crime type and list out the top 10 crime type that have the most cases.
78
+ top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
79
+ df_top = df[df['crm_cd_desc'].isin(top_crimes)]
80
+
81
+ # Group by crime type and year.
82
+ heatmap1_data = df_top.groupby(['crm_cd_desc', 'year']).size().unstack(fill_value=0)
83
+
84
+ # Create the heat map.
85
+ plt.figure(figsize=(10, 6))
86
+ sns.heatmap(heatmap1_data, annot=True, fmt="d", cmap="YlOrRd")
87
+ plt.title("Top 10 Crime Types by Year")
88
+ plt.xlabel("Year")
89
+ plt.ylabel("Crime Type")
90
+ plt.tight_layout()
91
+ plt.show()
92
+
93
+ st.altair_chart(heatmap1_data, theme="streamlit", use_container_width=True)
94
+
95
+
96
+ #------
97
+
98
+ ### Use this one!!!
99
+ # Count the crime type and list out the top 10 crime type that have the most cases.
100
+ top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
101
+ df = df[df['year'] != 2025]
102
+ df_top = df[df['crm_cd_desc'].isin(top_crimes)]
103
+
104
+ # Group by crime type and year.
105
+ stacked_year_df = df_top.groupby(['year', 'crm_cd_desc']).size().reset_index(name='count')
106
+
107
+ # Create the stacked bar chart.
108
+ bar_chart = alt.Chart(stacked_year_df).mark_bar().encode(
109
+ x=alt.X('year:O', title='Year'),
110
+ y=alt.Y('count:Q', stack='zero', title='Number of Incidents'),
111
+ color=alt.Color('crm_cd_desc:N', title='Crime Type'),
112
+ tooltip=['year', 'crm_cd_desc', 'count']
113
+ ).properties(
114
+ width=600,
115
+ height=400,
116
+ title='Stacked Crime Composition by Year (Top 10 Crime Types)'
117
+ )
118
+
119
+ st.altair_chart(bar_chart, theme="streamlit", use_container_width=True)
120
+
121
+
122
+ #----
123
+
124
+ ### Use this one!!!
125
+ # Plot 3: Line chart.
126
+ df = df[df['year'] != 2025] # 2025 is not end, so the trend can't be see
127
+
128
+ # Group the each crime type by year.
129
+ yearly_crime_counts = (
130
+ df.groupby(["year", "crm_cd_desc"])
131
+ .size()
132
+ .reset_index(name="Count")
133
+ )
134
+
135
+ # Filter the crime types that have the most top 5 cases.
136
+ top5_crimes = df["crm_cd_desc"].value_counts().nlargest(5).index
137
+ filtered_crimes = yearly_crime_counts[yearly_crime_counts["crm_cd_desc"].isin(top5_crimes)]
138
+
139
+ # Plot the line plot.
140
+ line_chart = alt.Chart(filtered_crimes).mark_line(point=True).encode(
141
+ x=alt.X("year:O", title="Year"),
142
+ y=alt.Y("Count:Q", title="Number of Incidents"),
143
+ color=alt.Color("crm_cd_desc:N", title="Crime Type"),
144
+ tooltip=["year", "crm_cd_desc", "Count"]
145
+ ).properties(
146
+ title="Yearly Trends of Top 5 Crime Types",
147
+ width=700,
148
+ height=400
149
+ )
150
+
151
+ # Display the plot.
152
+ st.altair_chart(line_chart, theme="streamlit", use_container_width=True)
153
+
154
+ #----
155
+ # Plot 4: Map.
156
+ # Load geojson file.
157
+ gdf_counties = gpd.read_file("County_Boundary.geojson")
158
+
159
+ # Creat dropdown menu.
160
+ year_dropdown = ipywidgets.Dropdown(
161
+ options= sorted(df['year'].unique()),
162
+ description='Year:'
163
+ )
164
+
165
+ # Create the map.
166
+ def crime_map(year):
167
+ # df_filtered = df[df['year'] == year].sample(n=500, random_state=1)
168
+ # df_filtered = df[df['year'] == year].sample(n=100, random_state=1)
169
+ df_filtered = df[df['year'] == year].sample(n=300, random_state=1)
170
+ gdf_points = gpd.GeoDataFrame(
171
+ df_filtered,
172
+ geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']),
173
+ crs="EPSG:4326"
174
+ )
175
+
176
+ fig, ax = plt.subplots(figsize=(10, 10))
177
+ gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white')
178
+ gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6)
179
+ ax.set_title(f"Crime Map - {year}")
180
+ ax.set_xlabel("Longitude")
181
+ ax.set_ylabel("Latitude")
182
+ plt.grid(True)
183
+ plt.show()
184
+
185
+ # Displat the plot.
186
+ ipywidgets.interact(crime_map, year=year_dropdown)
187
+
188
+
189
+
190
+
191
+ ### Use this one!!!
192
+ # Loading in the map.
193
+ gdf_counties = gpd.read_file("County_Boundary.geojson")
194
+
195
+ # Identify top 10 crime types
196
+ top_10_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index.tolist()
197
+
198
+ # Filter the main DataFrame to include only top 10 crimes
199
+ df_top = df[df['crm_cd_desc'].isin(top_10_crimes)]
200
+
201
+ # Create the dropdown.
202
+ crime_dropdown = ipywidgets.Dropdown(
203
+ options= sorted(top_10_crimes),
204
+ description="Crime Type:")
205
+
206
+ # Create the map.
207
+ def crime_map(year, crime):
208
+ df_filtered = df[(df['year'] == year) & (df['crm_cd_desc'] == crime)].sample(n=300, random_state=1)
209
+ gdf_points = gpd.GeoDataFrame(
210
+ df_filtered,
211
+ geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']),
212
+ crs="EPSG:4326"
213
+ )
214
+
215
+ fig, ax = plt.subplots(figsize=(10, 10))
216
+ gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white')
217
+ gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6)
218
+ ax.set_title(f"{crime} - {year}")
219
+ ax.set_xlabel("Longitude")
220
+ ax.set_ylabel("Latitude")
221
+ plt.grid(True)
222
+ plt.show()
223
+
224
+ # Displat the plot.
225
+ ipywidgets.interact(crime_map, year=year_dropdown, crime=crime_dropdown)