Crime_Dataset / src /streamlit_app.py
spatel54
test
13eaffe
import altair as alt
import numpy as np
import pandas as pd
import streamlit as st
"""
# Welcome to Streamlit!
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
forums](https://discuss.streamlit.io).
In the meantime, below is an example of what you can do with just a few lines of code:
"""
st.title("Crime Data Analysis")
# Load the dataset.
df = pd.read_csv("crime_data.csv")
# Check NaN values and types.
# df.isna().sum() # No NaN value in our dataframe.
# df.dtypes # Only "crm_cd_desc" is categorical variable(object).
# Test code.
df.head(5)
# Plot 1: Pie chart.
# Data filteration.
crm_tot = df["crm_cd_desc"].value_counts()
# Calculate the mean of crime cases.
mean_crm = crm_tot.mean()
# Filter out the crime cases that are below the mean of the crime cases.
crm_tot_filtered = crm_tot[crm_tot > mean_crm]
# Method comes from: https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html.
plt.figure(figsize=(12, 12))
fig, ax = plt.subplots()
ax.pie(crm_tot_filtered, labels=crm_tot_filtered.index, autopct='%1.1f%%', labeldistance=1.5, pctdistance=1.2)
#-----
### Use this one!!!
# A more detailed version pie chart based on the previous one.
# Filter the top 10 crime type.
top_crimes = (
df["crm_cd_desc"]
.value_counts()
.nlargest(10)
.reset_index()
.rename(columns={"index": "Crime Type", "crm_cd_desc": "Count"})
)
# Calculate the percentage of ecah kind of crime.
top_crimes["Percentage"] = top_crimes["Count"] / top_crimes["Count"].sum()
# Create the pie chart.
chart = alt.Chart(top_crimes).mark_arc(innerRadius=50).encode(
theta=alt.Theta(field="Count", type="quantitative"),
color=alt.Color(field="Crime Type", type="nominal", legend=alt.Legend(title="Crime Type")),
tooltip=["Crime Type", "Count", alt.Tooltip("Percentage:Q", format=".1%")]
).properties(
title="Top 10 Crime Types Distribution"
)
# Display the plot.
st.altair_chart(chart, theme="streamlit", use_container_width=True)
#------
### Use this one!!!
# Count the crime type and list out the top 10 crime type that have the most cases.
top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
df_top = df[df['crm_cd_desc'].isin(top_crimes)]
# Group by crime type and year.
heatmap1_data = df_top.groupby(['crm_cd_desc', 'year']).size().unstack(fill_value=0)
# Create the heat map.
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap1_data, annot=True, fmt="d", cmap="YlOrRd")
plt.title("Top 10 Crime Types by Year")
plt.xlabel("Year")
plt.ylabel("Crime Type")
plt.tight_layout()
plt.show()
st.altair_chart(heatmap1_data, theme="streamlit", use_container_width=True)
#------
### Use this one!!!
# Count the crime type and list out the top 10 crime type that have the most cases.
top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
df = df[df['year'] != 2025]
df_top = df[df['crm_cd_desc'].isin(top_crimes)]
# Group by crime type and year.
stacked_year_df = df_top.groupby(['year', 'crm_cd_desc']).size().reset_index(name='count')
# Create the stacked bar chart.
bar_chart = alt.Chart(stacked_year_df).mark_bar().encode(
x=alt.X('year:O', title='Year'),
y=alt.Y('count:Q', stack='zero', title='Number of Incidents'),
color=alt.Color('crm_cd_desc:N', title='Crime Type'),
tooltip=['year', 'crm_cd_desc', 'count']
).properties(
width=600,
height=400,
title='Stacked Crime Composition by Year (Top 10 Crime Types)'
)
st.altair_chart(bar_chart, theme="streamlit", use_container_width=True)
#----
### Use this one!!!
# Plot 3: Line chart.
df = df[df['year'] != 2025] # 2025 is not end, so the trend can't be see
# Group the each crime type by year.
yearly_crime_counts = (
df.groupby(["year", "crm_cd_desc"])
.size()
.reset_index(name="Count")
)
# Filter the crime types that have the most top 5 cases.
top5_crimes = df["crm_cd_desc"].value_counts().nlargest(5).index
filtered_crimes = yearly_crime_counts[yearly_crime_counts["crm_cd_desc"].isin(top5_crimes)]
# Plot the line plot.
line_chart = alt.Chart(filtered_crimes).mark_line(point=True).encode(
x=alt.X("year:O", title="Year"),
y=alt.Y("Count:Q", title="Number of Incidents"),
color=alt.Color("crm_cd_desc:N", title="Crime Type"),
tooltip=["year", "crm_cd_desc", "Count"]
).properties(
title="Yearly Trends of Top 5 Crime Types",
width=700,
height=400
)
# Display the plot.
st.altair_chart(line_chart, theme="streamlit", use_container_width=True)
#----
# Plot 4: Map.
# Load geojson file.
gdf_counties = gpd.read_file("County_Boundary.geojson")
# Creat dropdown menu.
year_dropdown = ipywidgets.Dropdown(
options= sorted(df['year'].unique()),
description='Year:'
)
# Create the map.
def crime_map(year):
# df_filtered = df[df['year'] == year].sample(n=500, random_state=1)
# df_filtered = df[df['year'] == year].sample(n=100, random_state=1)
df_filtered = df[df['year'] == year].sample(n=300, random_state=1)
gdf_points = gpd.GeoDataFrame(
df_filtered,
geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']),
crs="EPSG:4326"
)
fig, ax = plt.subplots(figsize=(10, 10))
gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white')
gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6)
ax.set_title(f"Crime Map - {year}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
plt.grid(True)
plt.show()
# Displat the plot.
ipywidgets.interact(crime_map, year=year_dropdown)
### Use this one!!!
# Loading in the map.
gdf_counties = gpd.read_file("County_Boundary.geojson")
# Identify top 10 crime types
top_10_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index.tolist()
# Filter the main DataFrame to include only top 10 crimes
df_top = df[df['crm_cd_desc'].isin(top_10_crimes)]
# Create the dropdown.
crime_dropdown = ipywidgets.Dropdown(
options= sorted(top_10_crimes),
description="Crime Type:")
# Create the map.
def crime_map(year, crime):
df_filtered = df[(df['year'] == year) & (df['crm_cd_desc'] == crime)].sample(n=300, random_state=1)
gdf_points = gpd.GeoDataFrame(
df_filtered,
geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']),
crs="EPSG:4326"
)
fig, ax = plt.subplots(figsize=(10, 10))
gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white')
gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6)
ax.set_title(f"{crime} - {year}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
plt.grid(True)
plt.show()
# Displat the plot.
ipywidgets.interact(crime_map, year=year_dropdown, crime=crime_dropdown)