Spaces:

spatel54
/

Crime_Dataset

Sleeping

App Files Files Community

spatel54 commited on May 6, 2025

Commit

13eaffe

1 Parent(s): 9f97e1f

test

Browse files

Files changed (1) hide show

src/streamlit_app.py +210 -1

src/streamlit_app.py CHANGED Viewed

@@ -13,4 +13,213 @@ forums](https://discuss.streamlit.io).
 In the meantime, below is an example of what you can do with just a few lines of code:
 """
-st.title("Streamlit Demo")

 In the meantime, below is an example of what you can do with just a few lines of code:
 """
+st.title("Crime Data Analysis")
+# Load the dataset.
+df = pd.read_csv("crime_data.csv")
+# Check NaN values and types.
+# df.isna().sum() # No NaN value in our dataframe.
+# df.dtypes # Only "crm_cd_desc" is categorical variable(object).
+# Test code.
+df.head(5)
+# Plot 1: Pie chart.
+# Data filteration.
+crm_tot = df["crm_cd_desc"].value_counts()
+# Calculate the mean of crime cases.
+mean_crm = crm_tot.mean()
+# Filter out the crime cases that are below the mean of the crime cases.
+crm_tot_filtered = crm_tot[crm_tot > mean_crm]
+# Method comes from: https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html.
+plt.figure(figsize=(12, 12))
+fig, ax = plt.subplots()
+ax.pie(crm_tot_filtered, labels=crm_tot_filtered.index, autopct='%1.1f%%', labeldistance=1.5, pctdistance=1.2)
+#-----
+### Use this one!!!
+# A more detailed version pie chart based on the previous one.
+# Filter the top 10 crime type.
+top_crimes = (
+    df["crm_cd_desc"]
+    .value_counts()
+    .nlargest(10)
+    .reset_index()
+    .rename(columns={"index": "Crime Type", "crm_cd_desc": "Count"})
+)
+# Calculate the percentage of ecah kind of crime.
+top_crimes["Percentage"] = top_crimes["Count"] / top_crimes["Count"].sum()
+# Create the pie chart.
+chart = alt.Chart(top_crimes).mark_arc(innerRadius=50).encode(
+    theta=alt.Theta(field="Count", type="quantitative"),
+    color=alt.Color(field="Crime Type", type="nominal", legend=alt.Legend(title="Crime Type")),
+    tooltip=["Crime Type", "Count", alt.Tooltip("Percentage:Q", format=".1%")]
+).properties(
+    title="Top 10 Crime Types Distribution"
+)
+# Display the plot.
+st.altair_chart(chart, theme="streamlit", use_container_width=True)
+#------
+### Use this one!!!
+# Count the crime type and list out the top 10 crime type that have the most cases.
+top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
+df_top = df[df['crm_cd_desc'].isin(top_crimes)]
+# Group by crime type and year.
+heatmap1_data = df_top.groupby(['crm_cd_desc', 'year']).size().unstack(fill_value=0)
+# Create the heat map.
+plt.figure(figsize=(10, 6))
+sns.heatmap(heatmap1_data, annot=True, fmt="d", cmap="YlOrRd")
+plt.title("Top 10 Crime Types by Year")
+plt.xlabel("Year")
+plt.ylabel("Crime Type")
+plt.tight_layout()
+plt.show()
+st.altair_chart(heatmap1_data, theme="streamlit", use_container_width=True)
+#------
+### Use this one!!!
+# Count the crime type and list out the top 10 crime type that have the most cases.
+top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
+df = df[df['year'] != 2025]
+df_top = df[df['crm_cd_desc'].isin(top_crimes)]
+# Group by crime type and year.
+stacked_year_df = df_top.groupby(['year', 'crm_cd_desc']).size().reset_index(name='count')
+# Create the stacked bar chart.
+bar_chart = alt.Chart(stacked_year_df).mark_bar().encode(
+    x=alt.X('year:O', title='Year'),
+    y=alt.Y('count:Q', stack='zero', title='Number of Incidents'),
+    color=alt.Color('crm_cd_desc:N', title='Crime Type'),
+    tooltip=['year', 'crm_cd_desc', 'count']
+).properties(
+    width=600,
+    height=400,
+    title='Stacked Crime Composition by Year (Top 10 Crime Types)'
+)
+st.altair_chart(bar_chart, theme="streamlit", use_container_width=True)
+#----
+### Use this one!!!
+# Plot 3: Line chart.
+df = df[df['year'] != 2025] # 2025 is not end, so the trend can't be see
+# Group the each crime type by year.
+yearly_crime_counts = (
+    df.groupby(["year", "crm_cd_desc"])
+    .size()
+    .reset_index(name="Count")
+)
+# Filter the crime types that have the most top 5 cases.
+top5_crimes = df["crm_cd_desc"].value_counts().nlargest(5).index
+filtered_crimes = yearly_crime_counts[yearly_crime_counts["crm_cd_desc"].isin(top5_crimes)]
+# Plot the line plot.
+line_chart = alt.Chart(filtered_crimes).mark_line(point=True).encode(
+    x=alt.X("year:O", title="Year"),
+    y=alt.Y("Count:Q", title="Number of Incidents"),
+    color=alt.Color("crm_cd_desc:N", title="Crime Type"),
+    tooltip=["year", "crm_cd_desc", "Count"]
+).properties(
+    title="Yearly Trends of Top 5 Crime Types",
+    width=700,
+    height=400
+)
+# Display the plot.
+st.altair_chart(line_chart, theme="streamlit", use_container_width=True)
+#----
+# Plot 4: Map.
+# Load geojson file.
+gdf_counties = gpd.read_file("County_Boundary.geojson")
+# Creat dropdown menu.
+year_dropdown = ipywidgets.Dropdown(
+    options= sorted(df['year'].unique()),
+    description='Year:'
+)
+# Create the map.
+def crime_map(year):
+    # df_filtered = df[df['year'] == year].sample(n=500, random_state=1)
+    # df_filtered = df[df['year'] == year].sample(n=100, random_state=1)
+    df_filtered = df[df['year'] == year].sample(n=300, random_state=1)
+    gdf_points = gpd.GeoDataFrame(
+        df_filtered,
+        geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']),
+        crs="EPSG:4326"
+    )
+    fig, ax = plt.subplots(figsize=(10, 10))
+    gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white')
+    gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6)
+    ax.set_title(f"Crime Map - {year}")
+    ax.set_xlabel("Longitude")
+    ax.set_ylabel("Latitude")
+    plt.grid(True)
+    plt.show()
+# Displat the plot.
+ipywidgets.interact(crime_map, year=year_dropdown)
+### Use this one!!!
+# Loading in the map.
+gdf_counties = gpd.read_file("County_Boundary.geojson")
+# Identify top 10 crime types
+top_10_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index.tolist()
+# Filter the main DataFrame to include only top 10 crimes
+df_top = df[df['crm_cd_desc'].isin(top_10_crimes)]
+# Create the dropdown.
+crime_dropdown = ipywidgets.Dropdown(
+    options= sorted(top_10_crimes),
+    description="Crime Type:")
+# Create the map.
+def crime_map(year, crime):
+    df_filtered = df[(df['year'] == year) & (df['crm_cd_desc'] == crime)].sample(n=300, random_state=1)
+    gdf_points = gpd.GeoDataFrame(
+        df_filtered,
+        geometry=gpd.points_from_xy(df_filtered['lon'], df_filtered['lat']),
+        crs="EPSG:4326"
+    )
+    fig, ax = plt.subplots(figsize=(10, 10))
+    gdf_counties.plot(ax=ax, color='lightgray', edgecolor='white')
+    gdf_points.plot(ax=ax, color='red', markersize=10, alpha=0.6)
+    ax.set_title(f"{crime} - {year}")
+    ax.set_xlabel("Longitude")
+    ax.set_ylabel("Latitude")
+    plt.grid(True)
+    plt.show()
+# Displat the plot.
+ipywidgets.interact(crime_map, year=year_dropdown, crime=crime_dropdown)