import altair as alt import numpy as np import pandas as pd import streamlit as st """ # Welcome to Streamlit! Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:. If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community forums](https://discuss.streamlit.io). In the meantime, below is an example of what you can do with just a few lines of code: """ st.title("Crime Data Analysis") # Load the dataset. df = pd.read_csv("crime_data.csv") # Check NaN values and types. # df.isna().sum() # No NaN value in our dataframe. # df.dtypes # Only "crm_cd_desc" is categorical variable(object). # Test code. df.head(5) # Plot 1: Pie chart. # Data filteration. crm_tot = df["crm_cd_desc"].value_counts() # Calculate the mean of crime cases. mean_crm = crm_tot.mean() # Filter out the crime cases that are below the mean of the crime cases. crm_tot_filtered = crm_tot[crm_tot > mean_crm] # Method comes from: https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html. plt.figure(figsize=(12, 12)) fig, ax = plt.subplots() ax.pie(crm_tot_filtered, labels=crm_tot_filtered.index, autopct='%1.1f%%', labeldistance=1.5, pctdistance=1.2) #----- ### Use this one!!! # A more detailed version pie chart based on the previous one. # Filter the top 10 crime type. top_crimes = ( df["crm_cd_desc"] .value_counts() .nlargest(10) .reset_index() .rename(columns={"index": "Crime Type", "crm_cd_desc": "Count"}) ) # Calculate the percentage of ecah kind of crime. top_crimes["Percentage"] = top_crimes["Count"] / top_crimes["Count"].sum() # Create the pie chart. chart = alt.Chart(top_crimes).mark_arc(innerRadius=50).encode( theta=alt.Theta(field="Count", type="quantitative"), color=alt.Color(field="Crime Type", type="nominal", legend=alt.Legend(title="Crime Type")), tooltip=["Crime Type", "Count", alt.Tooltip("Percentage:Q", format=".1%")] ).properties( title="Top 10 Crime Types Distribution" ) # Display the plot. st.altair_chart(chart, theme="streamlit", use_container_width=True) #------ ### Use this one!!! # Count the crime type and list out the top 10 crime type that have the most cases. top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index df_top = df[df['crm_cd_desc'].isin(top_crimes)] # Group by crime type and year. heatmap1_data = df_top.groupby(['crm_cd_desc', 'year']).size().unstack(fill_value=0) # Create the heat map. plt.figure(figsize=(10, 6)) sns.heatmap(heatmap1_data, annot=True, fmt="d", cmap="YlOrRd") plt.title("Top 10 Crime Types by Year") plt.xlabel("Year") plt.ylabel("Crime Type") plt.tight_layout() plt.show() st.altair_chart(heatmap1_data, theme="streamlit", use_container_width=True) #------ ### Use this one!!! # Count the crime type and list out the top 10 crime type that have the most cases. top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index df = df[df['year'] != 2025] df_top = df[df['crm_cd_desc'].isin(top_crimes)] # Group by crime type and year. stacked_year_df = df_top.groupby(['year', 'crm_cd_desc']).size().reset_index(name='count') # Create the stacked bar chart. bar_chart = alt.Chart(stacked_year_df).mark_bar().encode( x=alt.X('year:O', title='Year'), y=alt.Y('count:Q', stack='zero', title='Number of Incidents'), color=alt.Color('crm_cd_desc:N', title='Crime Type'), tooltip=['year', 'crm_cd_desc', 'count'] ).properties( width=600, height=400, title='Stacked Crime Composition by Year (Top 10 Crime Types)' ) st.altair_chart(bar_chart, theme="streamlit", use_container_width=True) #---- ### Use this one!!! # Plot 3: Line chart. df = df[df['year'] != 2025] # 2025 is not end, so the trend can't be see # Group the each crime type by year. yearly_crime_counts = ( df.groupby(["year", "crm_cd_desc"]) .size() .reset_index(name="Count") ) # Filter the crime types that have the most top 5 cases. top5_crimes = df["crm_cd_desc"].value_counts().nlargest(5).index filtered_crimes = yearly_crime_counts[yearly_crime_counts["crm_cd_desc"].isin(top5_crimes)] # Plot the line plot. line_chart = alt.Chart(filtered_crimes).mark_line(point=True).encode( x=alt.X("year:O", title="Year"), y=alt.Y("Count:Q", title="Number of Incidents"), color=alt.Color("crm_cd_desc:N", title="Crime Type"), tooltip=["year", "crm_cd_desc", "Count"] ).properties( title="Yearly Trends of Top 5 Crime Types", width=700, height=400 ) # Display the plot. st.altair_chart(line_chart, theme="streamlit", use_container_width=True) #---- # Plot 4: Map. # Load geojson file. gdf_counties = gpd.read_file("County_Boundary.geojson") # Creat dropdown menu. year_dropdown = ipywidgets.Dropdown( options= sorted(df['year'].unique()), description='Year:' ) # Create the map. def crime_map(year): # df_filtered = df[df['year'] == year].sample(n=500, random_state=1) # df_filtered = df[df['year'] == year].sample(n=100, random_state=1) df_filtered = df[df['year'] == year].sample(n=300, random_state=1) gdf_points = gpd.GeoDataFrame( df_filtered, geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']), crs="EPSG:4326" ) fig, ax = plt.subplots(figsize=(10, 10)) gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white') gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6) ax.set_title(f"Crime Map - {year}") ax.set_xlabel("Longitude") ax.set_ylabel("Latitude") plt.grid(True) plt.show() # Displat the plot. ipywidgets.interact(crime_map, year=year_dropdown) ### Use this one!!! # Loading in the map. gdf_counties = gpd.read_file("County_Boundary.geojson") # Identify top 10 crime types top_10_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index.tolist() # Filter the main DataFrame to include only top 10 crimes df_top = df[df['crm_cd_desc'].isin(top_10_crimes)] # Create the dropdown. crime_dropdown = ipywidgets.Dropdown( options= sorted(top_10_crimes), description="Crime Type:") # Create the map. def crime_map(year, crime): df_filtered = df[(df['year'] == year) & (df['crm_cd_desc'] == crime)].sample(n=300, random_state=1) gdf_points = gpd.GeoDataFrame( df_filtered, geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']), crs="EPSG:4326" ) fig, ax = plt.subplots(figsize=(10, 10)) gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white') gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6) ax.set_title(f"{crime} - {year}") ax.set_xlabel("Longitude") ax.set_ylabel("Latitude") plt.grid(True) plt.show() # Displat the plot. ipywidgets.interact(crime_map, year=year_dropdown, crime=crime_dropdown)