wanwanlin0521 commited on
Commit
7e2295c
·
verified ·
1 Parent(s): 288379c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +40 -34
src/streamlit_app.py CHANGED
@@ -41,6 +41,42 @@ DATA_PATH = Path(__file__).parent / "crime_data.csv" # /app/src/crime_dat
41
  def load_data():
42
  return pd.read_csv(DATA_PATH)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # ── 0. Page configuration ──
45
  if page == "Pie Chart":
46
  st.set_page_config(
@@ -95,10 +131,10 @@ if page == "Pie Chart":
95
  st.cache_data.clear() # Clear the cache
96
  st.toast("Data is refreshed",icon="✅") # Reload the data
97
 
98
- # 2. Load and early‐exit if missing
99
- df = load_data()
100
- if df.empty:
101
- st.stop()
102
 
103
  # 3. Data preview
104
  st.header("Data Preview")
@@ -187,36 +223,6 @@ if page == "Pie Chart":
187
  # Description.
188
  st.markdown(""" The donut chart shows the share of the ten most frequent crime categories in the selected year. At the center, you can see that Vehicle ­– Stolen is the single largest slice, accounting for roughly 18.7% of all incidents, The remaining five categories each represent between 3%–5% of total incidents—these include miscellaneous crimes, criminal threats, assault with a deadly weapon, burglary, and minor vandalism. By displaying both slice size and percentage labels, the chart makes it easy to compare how dominant property‐related offenses are, versus violent or lesser‐common crimes, in that year’s LAPD data. """)
189
 
190
- # Data processing.
191
- # Count the crime type and list out the top 10 crime type that have the most cases.
192
- top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
193
- df_top = df[df['crm_cd_desc'].isin(top_crimes)]
194
-
195
- # Filter out the year 2025 since it is not the end, so that the trend can't be see.
196
- df = df[df['year'] != 2025]
197
-
198
- # Group the each crime type by year.
199
- yearly_crime_counts = (
200
- df.groupby(["year", "crm_cd_desc"])
201
- .size()
202
- .reset_index(name="Count")
203
- )
204
-
205
- # Filter the crime types that have the most top 5 cases.
206
- top5_crimes = df["crm_cd_desc"].value_counts().nlargest(5).index
207
- filtered_crimes = yearly_crime_counts[yearly_crime_counts["crm_cd_desc"].isin(top5_crimes)]
208
-
209
- # Identify top 10 crime types
210
- top_10_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index.tolist()
211
-
212
- # Filter the main DataFrame to include only top 10 crimes
213
- df_top = df[df['crm_cd_desc'].isin(top_10_crimes)]
214
-
215
- # Group by crime type and year.
216
- stacked_year_df = df_top.groupby(['year', 'crm_cd_desc']).size().reset_index(name='count')
217
-
218
- # Group by crime type and year.
219
- heatmap1_df = df_top.groupby(['crm_cd_desc', 'year']).size().reset_index(name='count')
220
 
221
  # -------------------------------- Plot 2: Heat Map --------------------------------
222
  elif page == "Heat Map":
 
41
  def load_data():
42
  return pd.read_csv(DATA_PATH)
43
 
44
+ # 2. Load and early‐exit if missing
45
+ df = load_data()
46
+ if df.empty:
47
+ st.stop()
48
+
49
+ # Data processing.
50
+ # Count the crime type and list out the top 10 crime type that have the most cases.
51
+ top_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index
52
+ df_top = df[df['crm_cd_desc'].isin(top_crimes)]
53
+
54
+ # Filter out the year 2025 since it is not the end, so that the trend can't be see.
55
+ df = df[df['year'] != 2025]
56
+
57
+ # Group the each crime type by year.
58
+ yearly_crime_counts = (
59
+ df.groupby(["year", "crm_cd_desc"])
60
+ .size()
61
+ .reset_index(name="Count")
62
+ )
63
+
64
+ # Filter the crime types that have the most top 5 cases.
65
+ top5_crimes = df["crm_cd_desc"].value_counts().nlargest(5).index
66
+ filtered_crimes = yearly_crime_counts[yearly_crime_counts["crm_cd_desc"].isin(top5_crimes)]
67
+
68
+ # Identify top 10 crime types
69
+ top_10_crimes = df['crm_cd_desc'].value_counts().nlargest(10).index.tolist()
70
+
71
+ # Filter the main DataFrame to include only top 10 crimes
72
+ df_top = df[df['crm_cd_desc'].isin(top_10_crimes)]
73
+
74
+ # Group by crime type and year.
75
+ stacked_year_df = df_top.groupby(['year', 'crm_cd_desc']).size().reset_index(name='count')
76
+
77
+ # Group by crime type and year.
78
+ heatmap1_df = df_top.groupby(['crm_cd_desc', 'year']).size().reset_index(name='count')
79
+
80
  # ── 0. Page configuration ──
81
  if page == "Pie Chart":
82
  st.set_page_config(
 
131
  st.cache_data.clear() # Clear the cache
132
  st.toast("Data is refreshed",icon="✅") # Reload the data
133
 
134
+ # # 2. Load and early‐exit if missing
135
+ # df = load_data()
136
+ # if df.empty:
137
+ # st.stop()
138
 
139
  # 3. Data preview
140
  st.header("Data Preview")
 
223
  # Description.
224
  st.markdown(""" The donut chart shows the share of the ten most frequent crime categories in the selected year. At the center, you can see that Vehicle ­– Stolen is the single largest slice, accounting for roughly 18.7% of all incidents, The remaining five categories each represent between 3%–5% of total incidents—these include miscellaneous crimes, criminal threats, assault with a deadly weapon, burglary, and minor vandalism. By displaying both slice size and percentage labels, the chart makes it easy to compare how dominant property‐related offenses are, versus violent or lesser‐common crimes, in that year’s LAPD data. """)
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  # -------------------------------- Plot 2: Heat Map --------------------------------
228
  elif page == "Heat Map":