MHuzaifaa commited on
Commit
0a52d98
·
verified ·
1 Parent(s): 61b0513

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +773 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,775 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import os
6
+ import plotly.express as px
7
+ import folium
8
+ from folium.plugins import HeatMap, HeatMapWithTime
9
+ from streamlit_folium import folium_static
10
+ from preprocessing import preprocess_pipeline, get_season
11
+ import xgboost as xgb
12
+ import pickle
13
+ from scipy.sparse import hstack, csr_matrix
14
+ from groq import Groq
15
+
16
+ # Set page config
17
+ st.set_page_config(
18
+ page_title="SF Crime Analytics | AI-Powered",
19
+ page_icon="🚓",
20
+ layout="wide",
21
+ initial_sidebar_state="expanded"
22
+ )
23
+
24
+ # Custom CSS for Premium Look
25
+ st.markdown("""
26
+ <style>
27
+ .main {
28
+ background-color: #0e1117;
29
+ }
30
+ .stApp {
31
+ background-color: #0e1117;
32
+ }
33
+ h1, h2, h3 {
34
+ color: #ffffff;
35
+ font-family: 'Helvetica Neue', sans-serif;
36
+ font-weight: 700;
37
+ }
38
+ .stButton>button {
39
+ background-color: #ff4b4b;
40
+ color: white;
41
+ border-radius: 20px;
42
+ padding: 10px 24px;
43
+ font-weight: 600;
44
+ border: none;
45
+ transition: all 0.3s ease;
46
+ }
47
+ .stButton>button:hover {
48
+ background-color: #ff3333;
49
+ transform: scale(1.05);
50
+ }
51
+ .metric-card {
52
+ background-color: #262730;
53
+ padding: 20px;
54
+ border-radius: 10px;
55
+ border-left: 5px solid #ff4b4b;
56
+ box-shadow: 0 4px 6px rgba(0,0,0,0.3);
57
+ }
58
+ .report-text {
59
+ font-family: 'Courier New', monospace;
60
+ color: #00ff00;
61
+ background-color: #000000;
62
+ padding: 15px;
63
+ border-radius: 5px;
64
+ border: 1px solid #00ff00;
65
+ }
66
+ .chat-bubble-user {
67
+ background-color: #2b313e;
68
+ color: white;
69
+ padding: 10px;
70
+ border-radius: 15px 15px 0 15px;
71
+ margin: 5px;
72
+ text-align: right;
73
+ }
74
+ .chat-bubble-bot {
75
+ background-color: #ff4b4b;
76
+ color: white;
77
+ padding: 10px;
78
+ border-radius: 15px 15px 15px 0;
79
+ margin: 5px;
80
+ text-align: left;
81
+ }
82
+
83
+ /* New Chat Assistant Styles */
84
+ .glass-card {
85
+ background: rgba(255, 255, 255, 0.05);
86
+ backdrop-filter: blur(10px);
87
+ -webkit-backdrop-filter: blur(10px);
88
+ padding: 30px;
89
+ border-radius: 24px;
90
+ border: 1px solid rgba(255, 255, 255, 0.1);
91
+ box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
92
+ transition: all 0.4s ease;
93
+ margin-bottom: 25px;
94
+ }
95
+
96
+ .user-message {
97
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
98
+ padding: 15px 20px;
99
+ border-radius: 18px 18px 5px 18px;
100
+ margin: 10px 0;
101
+ max-width: 80%;
102
+ margin-left: auto;
103
+ color: white;
104
+ font-size: 1rem;
105
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3);
106
+ }
107
+
108
+ .ai-message {
109
+ background: rgba(255, 255, 255, 0.08);
110
+ backdrop-filter: blur(10px);
111
+ padding: 15px 20px;
112
+ border-radius: 18px 18px 18px 5px;
113
+ margin: 10px 0;
114
+ max-width: 80%;
115
+ margin-right: auto;
116
+ color: #e2e8f0;
117
+ font-size: 1rem;
118
+ border: 1px solid rgba(255, 255, 255, 0.1);
119
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
120
+ }
121
+
122
+ .chat-container {
123
+ background: rgba(255, 255, 255, 0.03);
124
+ backdrop-filter: blur(10px);
125
+ padding: 25px;
126
+ border-radius: 20px;
127
+ border: 1px solid rgba(255, 255, 255, 0.1);
128
+ max-height: 500px;
129
+ overflow-y: auto;
130
+ margin-bottom: 20px;
131
+ }
132
+ </style>
133
+ """, unsafe_allow_html=True)
134
+
135
+ # Load Resources
136
+ @st.cache_resource
137
+ def load_resources():
138
+ models_dir = os.path.join(os.path.dirname(__file__), '../models')
139
+ model_path = os.path.join(models_dir, 'best_model.pkl')
140
+ encoders_path = os.path.join(models_dir, 'label_encoders.pkl')
141
+ kmeans_path = os.path.join(models_dir, 'kmeans.pkl')
142
+
143
+ if not os.path.exists(model_path) or not os.path.exists(encoders_path) or not os.path.exists(kmeans_path):
144
+ return None, None, None
145
+
146
+ model = joblib.load(model_path)
147
+ encoders = joblib.load(encoders_path)
148
+ kmeans = joblib.load(kmeans_path)
149
+ return model, encoders, kmeans
150
+
151
+ @st.cache_resource
152
+ def load_new_artifacts():
153
+ try:
154
+ models_dir = os.path.join(os.path.dirname(__file__), '../models')
155
+ pkl_path = os.path.join(models_dir, "crime_xgb_artifacts.pkl")
156
+ with open(pkl_path, 'rb') as f:
157
+ return pickle.load(f)
158
+ except Exception as e:
159
+ st.error(f"❌ Artifact loading error: {e}")
160
+ return None
161
+
162
+ @st.cache_data
163
+ def load_data_sample():
164
+ data_dir = os.path.join(os.path.dirname(__file__), '../data/crimedataset')
165
+ try:
166
+ df = pd.read_csv(os.path.join(data_dir, 'train.csv'), parse_dates=['Dates'])
167
+ return df.sample(10000, random_state=42)
168
+ except:
169
+ return pd.DataFrame()
170
+
171
+ model, encoders, kmeans = load_resources()
172
+ new_artifacts = load_new_artifacts()
173
+ df_sample = load_data_sample()
174
+
175
+ # ------------------- GROQ SETUP -------------------
176
+ @st.cache_resource
177
+ def get_groq_client():
178
+ return Groq(api_key="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")
179
+
180
+ def explain_prediction_with_llama(prompt):
181
+ """Use Groq's Llama model to explain crime prediction"""
182
+ try:
183
+ client = get_groq_client()
184
+ chat_completion = client.chat.completions.create(
185
+ messages=[
186
+ {
187
+ "role": "user",
188
+ "content": prompt,
189
+ }
190
+ ],
191
+ model="llama-3.3-70b-versatile",
192
+ )
193
+ return chat_completion.choices[0].message.content
194
+ except Exception as e:
195
+ return f"⚠️ Could not generate explanation: {e}"
196
+
197
+ # Header
198
+ col1, col2 = st.columns([3, 1])
199
+ with col1:
200
+ st.title("San Francisco Crime Analytics")
201
+ st.markdown("#### AI-Powered Predictive Policing Dashboard")
202
+ with col2:
203
+ if model:
204
+ st.success("🟢 System Online: Models Loaded")
205
+ else:
206
+ st.error("🔴 System Offline: Models Missing")
207
+
208
+ st.sidebar.markdown("---")
209
+ st.sidebar.markdown("**System Status**")
210
+ st.sidebar.markdown("🟢 **Online** | ⚡ **12ms**")
211
+ st.sidebar.markdown(f"📅 {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}")
212
+ st.sidebar.markdown("---")
213
+
214
+ # Sidebar
215
+ st.sidebar.image("https://img.icons8.com/fluency/96/police-badge.png", width=80)
216
+ st.sidebar.header("Incident Parameters")
217
+
218
+ date_input = st.sidebar.date_input("Date")
219
+ time_input = st.sidebar.time_input("Time")
220
+ district = st.sidebar.selectbox("District", options=encoders['PdDistrict'].classes_ if encoders else [])
221
+ st.sidebar.subheader("Geolocation")
222
+ latitude = st.sidebar.number_input("Latitude", value=37.7749, format="%.6f")
223
+ longitude = st.sidebar.number_input("Longitude", value=-122.4194, format="%.6f")
224
+
225
+ # Main Prediction Logic
226
+ if st.sidebar.button("Analyze Risk Level", type="primary"):
227
+ if model is None:
228
+ st.error("Model not trained yet. Please run training script.")
229
+ else:
230
+ # Prepare Input
231
+ datetime_combined = pd.to_datetime(f"{date_input} {time_input}")
232
+
233
+ input_data = pd.DataFrame({
234
+ 'Dates': [datetime_combined],
235
+ 'X': [longitude],
236
+ 'Y': [latitude],
237
+ 'PdDistrict': [district]
238
+ })
239
+
240
+ # Preprocess
241
+ processed_df, _ = preprocess_pipeline(input_data, is_train=False, kmeans_model=kmeans)
242
+
243
+ # Encoding
244
+ processed_df['PdDistrict'] = encoders['PdDistrict'].transform(processed_df['PdDistrict'])
245
+ processed_df['Season'] = encoders['Season'].transform(processed_df['Season'])
246
+
247
+ # Features
248
+ features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season']
249
+
250
+ prediction = model.predict(processed_df[features])[0]
251
+ proba = model.predict_proba(processed_df[features])[0]
252
+
253
+ st.markdown("---")
254
+ st.subheader("Analysis Results")
255
+
256
+ r_col1, r_col2, r_col3 = st.columns(3)
257
+
258
+ with r_col1:
259
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
260
+ st.metric("Risk Probability", f"{max(proba)*100:.1f}%")
261
+ st.markdown('</div>', unsafe_allow_html=True)
262
+
263
+ with r_col2:
264
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
265
+ if prediction == 1:
266
+ st.metric("Predicted Classification", "VIOLENT", delta="High Risk", delta_color="inverse")
267
+ else:
268
+ st.metric("Predicted Classification", "NON-VIOLENT", delta="Low Risk", delta_color="normal")
269
+ st.markdown('</div>', unsafe_allow_html=True)
270
+
271
+ with r_col3:
272
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
273
+ st.metric("Location Cluster", f"Zone {processed_df['LocationCluster'][0]}")
274
+ st.markdown('</div>', unsafe_allow_html=True)
275
+
276
+ # AI Analyst Report
277
+ st.markdown("### 🤖 AI Analyst Report")
278
+ risk_level = "CRITICAL" if proba[1] > 0.7 else "ELEVATED" if proba[1] > 0.4 else "STANDARD"
279
+ report = f"""
280
+ [CLASSIFIED REPORT - GENERATED BY AI]
281
+ -------------------------------------
282
+ DATE: {date_input} | TIME: {time_input}
283
+ LOCATION: {district} (Lat: {latitude}, Lon: {longitude})
284
+
285
+ ASSESSMENT: {risk_level} RISK DETECTED
286
+ PROBABILITY OF VIOLENCE: {proba[1]*100:.2f}%
287
+
288
+ KEY FACTORS:
289
+ - Time of Day: {time_input.hour}:00 hours (Historical high-risk window)
290
+ - District Profile: {district} shows elevated activity trends.
291
+ - Seasonal Context: {get_season(datetime_combined.month)} patterns observed.
292
+
293
+ RECOMMENDATION:
294
+ Immediate deployment of patrol units advised if risk > 50%.
295
+ Monitor sector {processed_df['LocationCluster'][0]} closely.
296
+ """
297
+ st.markdown(f'<div class="report-text">{report}</div>', unsafe_allow_html=True)
298
+
299
+ st.download_button(
300
+ label="📄 Download Full Report",
301
+ data=report,
302
+ file_name=f"crime_report_{date_input}_{district}.txt",
303
+ mime="text/plain"
304
+ )
305
+
306
+ # Explainability
307
+ st.markdown("### 🧠 Model Explainability")
308
+ if hasattr(model, 'feature_importances_'):
309
+ feat_imp = pd.DataFrame({
310
+ 'Feature': features,
311
+ 'Importance': model.feature_importances_
312
+ }).sort_values(by='Importance', ascending=False)
313
+
314
+ fig_imp = px.bar(feat_imp, x='Importance', y='Feature', orientation='h',
315
+ title="What drove this prediction?", template='plotly_dark',
316
+ color='Importance', color_continuous_scale='Viridis')
317
+ st.plotly_chart(fig_imp)
318
+
319
+ # Dashboard Tabs
320
+ st.markdown("---")
321
+ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["📊 Historical Trends", "🗺️ Geospatial Intelligence", "🚨 Tactical Simulation", "💬 Chat with Data", "🧪 Scenario Tester", "🚀 Advanced Prediction (99%)"])
322
+
323
+ with tab1:
324
+ if not df_sample.empty:
325
+ col1, col2 = st.columns(2)
326
+
327
+ with col1:
328
+ st.subheader("Crime Distribution by Hour")
329
+ df_sample['Hour'] = df_sample['Dates'].dt.hour
330
+ hourly_counts = df_sample.groupby('Hour').size().reset_index(name='Count')
331
+ fig_hour = px.bar(hourly_counts, x='Hour', y='Count', color='Count',
332
+ color_continuous_scale='RdBu_r', template='plotly_dark')
333
+ st.plotly_chart(fig_hour)
334
+
335
+ with col2:
336
+ st.subheader("Incidents by District")
337
+ district_counts = df_sample['PdDistrict'].value_counts().reset_index()
338
+ district_counts.columns = ['District', 'Count']
339
+ fig_dist = px.pie(district_counts, values='Count', names='District', hole=0.4,
340
+ template='plotly_dark', color_discrete_sequence=px.colors.sequential.RdBu)
341
+ st.plotly_chart(fig_dist)
342
+ else:
343
+ st.warning("Data loading...")
344
+
345
+ with tab2:
346
+ st.subheader("Spatiotemporal Crime Analysis")
347
+ if not df_sample.empty:
348
+ # Time-Lapse Heatmap
349
+ st.write("**24-Hour Crime Evolution (Time-Lapse)**")
350
+
351
+ # Prepare data for HeatMapWithTime
352
+ # List of lists of points, one list per time step (hour)
353
+ heat_data_time = []
354
+ time_index = []
355
+
356
+ for hour in range(24):
357
+ hour_data = df_sample[df_sample['Dates'].dt.hour == hour]
358
+ heat_data_time.append(hour_data[['Y', 'X']].values.tolist())
359
+ time_index.append(f"{hour:02d}:00")
360
+
361
+ m = folium.Map(location=[37.7749, -122.4194], zoom_start=12, tiles='CartoDB dark_matter')
362
+
363
+ HeatMapWithTime(
364
+ heat_data_time,
365
+ index=time_index,
366
+ auto_play=True,
367
+ max_opacity=0.8,
368
+ radius=15
369
+ ).add_to(m)
370
+
371
+ folium_static(m, width=1000)
372
+
373
+ st.markdown("---")
374
+ st.write("**Static Density Heatmap**")
375
+ m_static = folium.Map(location=[37.7749, -122.4194], zoom_start=12, tiles='CartoDB dark_matter')
376
+ heat_data = [[row['Y'], row['X']] for index, row in df_sample.iterrows()]
377
+ HeatMap(heat_data, radius=15).add_to(m_static)
378
+ folium_static(m_static, width=1000)
379
+ else:
380
+ st.warning("Data not loaded.")
381
+
382
+ with tab3:
383
+ st.subheader("Resource Allocation Simulator")
384
+ st.info("Use this tool to simulate patrol strategies based on predictive risk modeling.")
385
+
386
+ sim_col1, sim_col2 = st.columns([1, 2])
387
+
388
+ with sim_col1:
389
+ st.markdown("### Simulation Controls")
390
+ sim_district = st.selectbox("Target District", options=encoders['PdDistrict'].classes_ if encoders else [], key='sim_dist')
391
+ sim_hour = st.slider("Patrol Hour", 0, 23, 22)
392
+ sim_date = st.date_input("Patrol Date", key='sim_date')
393
+
394
+ with sim_col2:
395
+ st.markdown("### AI Recommendation Engine")
396
+ if model and kmeans:
397
+ if not df_sample.empty:
398
+ district_center = df_sample[df_sample['PdDistrict'] == sim_district][['Y', 'X']].mean()
399
+ sim_lat = district_center['Y']
400
+ sim_lon = district_center['X']
401
+ else:
402
+ sim_lat, sim_lon = 37.7749, -122.4194
403
+
404
+ sim_datetime = pd.to_datetime(f"{sim_date} {sim_hour}:00:00")
405
+
406
+ sim_input = pd.DataFrame({
407
+ 'Dates': [sim_datetime],
408
+ 'X': [sim_lon],
409
+ 'Y': [sim_lat],
410
+ 'PdDistrict': [sim_district]
411
+ })
412
+
413
+ # Process
414
+ sim_processed, _ = preprocess_pipeline(sim_input, is_train=False, kmeans_model=kmeans)
415
+ sim_processed['PdDistrict'] = encoders['PdDistrict'].transform(sim_processed['PdDistrict'])
416
+ sim_processed['Season'] = encoders['Season'].transform(sim_processed['Season'])
417
+
418
+ # Features
419
+ features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season']
420
+
421
+ # Predict
422
+ sim_prob = model.predict_proba(sim_processed[features])[0]
423
+ violent_prob = sim_prob[1]
424
+
425
+ st.write(f"Analyzing sector **{sim_district}** at **{sim_hour}:00**...")
426
+
427
+ # Gauge Chart
428
+ fig_gauge = px.bar(x=[violent_prob], y=["Risk"], orientation='h', range_x=[0, 1],
429
+ labels={'x': 'Violent Crime Probability', 'y': ''}, height=100,
430
+ color=[violent_prob], color_continuous_scale=['green', 'yellow', 'red'])
431
+ fig_gauge.update_layout(showlegend=False, template='plotly_dark', margin=dict(l=0, r=0, t=0, b=0))
432
+ st.plotly_chart(fig_gauge)
433
+
434
+ if violent_prob > 0.7:
435
+ st.error("⚠️ **CRITICAL RISK DETECTED**")
436
+ st.markdown("""
437
+ **Recommended Action Plan:**
438
+ - 🔴 Deploy SWAT / Heavy Tactical Units
439
+ - 🚁 Request Aerial Surveillance
440
+ - 🚧 Establish Perimeter Checkpoints
441
+ """)
442
+ elif violent_prob > 0.4:
443
+ st.warning("⚠️ **ELEVATED RISK**")
444
+ st.markdown("""
445
+ **Recommended Action Plan:**
446
+ - 🟡 Increase Patrol Frequency (Double Units)
447
+ - 👮 Station Plainclothes Officers
448
+ - 🔦 Ensure High Visibility
449
+ """)
450
+ else:
451
+ st.success("✅ **STANDARD RISK**")
452
+ st.markdown("""
453
+ **Recommended Action Plan:**
454
+ - 🟢 Standard Patrol Routine
455
+ - 📹 Monitor CCTV Feeds
456
+ - 🚗 Community Policing
457
+ """)
458
+ else:
459
+ st.warning("Model not loaded. Cannot run simulation.")
460
+
461
+ with tab4:
462
+ st.subheader("💬 Chat with Data (Natural Language Interface)")
463
+ st.markdown("Ask questions about the crime data. Example: *'Show me robberies in Mission'* or *'Assaults in Tenderloin'*")
464
+
465
+ user_query = st.text_input("Ask a question...", placeholder="Type here...")
466
+
467
+ if user_query:
468
+ st.markdown(f'<div class="chat-bubble-user">User: {user_query}</div>', unsafe_allow_html=True)
469
+
470
+ # Simple Intent Parser
471
+ query_lower = user_query.lower()
472
+
473
+ # Filter Logic
474
+ filtered_df = df_sample.copy()
475
+
476
+ # Categories
477
+ found_cat = None
478
+ categories = df_sample['Category'].unique()
479
+ for cat in categories:
480
+ if cat.lower() in query_lower:
481
+ filtered_df = filtered_df[filtered_df['Category'] == cat]
482
+ found_cat = cat
483
+ break
484
+
485
+ # Districts
486
+ found_dist = None
487
+ districts = df_sample['PdDistrict'].unique()
488
+ for dist in districts:
489
+ if dist.lower() in query_lower:
490
+ filtered_df = filtered_df[filtered_df['PdDistrict'] == dist]
491
+ found_dist = dist
492
+ break
493
+
494
+ # Response Generation
495
+ response_text = ""
496
+ if found_cat and found_dist:
497
+ response_text = f"Filtering for **{found_cat}** in **{found_dist}**."
498
+ elif found_cat:
499
+ response_text = f"Filtering for **{found_cat}** across all districts."
500
+ elif found_dist:
501
+ response_text = f"Showing all crimes in **{found_dist}**."
502
+ else:
503
+ response_text = "I couldn't identify a specific category or district. Showing general trends."
504
+
505
+ count = len(filtered_df)
506
+ response_text += f" Found **{count}** incidents."
507
+
508
+ st.markdown(f'<div class="chat-bubble-bot">AI: {response_text}</div>', unsafe_allow_html=True)
509
+
510
+ if not filtered_df.empty:
511
+ st.dataframe(filtered_df[['Dates', 'Category', 'PdDistrict', 'Address']].head(10))
512
+
513
+ # Dynamic Chart based on query
514
+ if found_dist and not found_cat:
515
+ # Show breakdown by category for that district
516
+ fig = px.bar(filtered_df['Category'].value_counts().head(10), orientation='h',
517
+ title=f"Top Crimes in {found_dist}", template='plotly_dark')
518
+ st.plotly_chart(fig)
519
+ elif found_cat:
520
+ # Show breakdown by hour or district
521
+ fig = px.histogram(filtered_df, x='Dates', title=f"Timeline of {found_cat}", template='plotly_dark')
522
+ st.plotly_chart(fig, key="timeline")
523
+
524
+ with tab5:
525
+ st.subheader("🧪 Model Validation: Scenario Tester")
526
+ st.info("Test the AI against real historical cases to verify its accuracy.")
527
+
528
+ if 'scenario_case' not in st.session_state:
529
+ st.session_state.scenario_case = None
530
+
531
+ if st.button("🎲 Load Random Historical Case", type="primary"):
532
+ if not df_sample.empty:
533
+ st.session_state.scenario_case = df_sample.sample(1).iloc[0]
534
+ else:
535
+ st.warning("Data not loaded.")
536
+
537
+ if st.session_state.scenario_case is not None:
538
+ case = st.session_state.scenario_case
539
+
540
+ # Display Case Details (Masking the Truth)
541
+ st.markdown("### 📁 Case File #8921-X")
542
+ c1, c2, c3 = st.columns(3)
543
+ with c1:
544
+ st.markdown(f"**Date:** {case['Dates'].date()}")
545
+ st.markdown(f"**Time:** {case['Dates'].time()}")
546
+ with c2:
547
+ st.markdown(f"**District:** {case['PdDistrict']}")
548
+ st.markdown(f"**Location:** {case['Address']}")
549
+ with c3:
550
+ st.markdown(f"**Coordinates:** {case['Y']:.4f}, {case['X']:.4f}")
551
+
552
+ st.markdown("---")
553
+
554
+ if st.button("🤖 Run AI Analysis"):
555
+ # Prepare Input
556
+ input_data = pd.DataFrame({
557
+ 'Dates': [case['Dates']],
558
+ 'X': [case['X']],
559
+ 'Y': [case['Y']],
560
+ 'PdDistrict': [case['PdDistrict']]
561
+ })
562
+
563
+ # Preprocess
564
+ processed_df, _ = preprocess_pipeline(input_data, is_train=False, kmeans_model=kmeans)
565
+ processed_df['PdDistrict'] = encoders['PdDistrict'].transform(processed_df['PdDistrict'])
566
+ processed_df['Season'] = encoders['Season'].transform(processed_df['Season'])
567
+
568
+ # Features
569
+ features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season']
570
+
571
+ # Predict
572
+ prediction = model.predict(processed_df[features])[0]
573
+ proba = model.predict_proba(processed_df[features])[0]
574
+
575
+ # Determine Actual
576
+ violent_categories = ['ASSAULT', 'ROBBERY', 'SEX OFFENSES FORCIBLE', 'KIDNAPPING', 'HOMICIDE', 'ARSON']
577
+ actual_is_violent = 1 if case['Category'] in violent_categories else 0
578
+ actual_label = "VIOLENT" if actual_is_violent else "NON-VIOLENT"
579
+ pred_label = "VIOLENT" if prediction == 1 else "NON-VIOLENT"
580
+
581
+ # Display Results
582
+ r1, r2 = st.columns(2)
583
+
584
+ with r1:
585
+ st.markdown("#### AI Prediction")
586
+ if prediction == 1:
587
+ st.error(f"**{pred_label}** ({proba[1]*100:.1f}% Confidence)")
588
+ else:
589
+ st.success(f"**{pred_label}** ({proba[0]*100:.1f}% Confidence)")
590
+
591
+ with r2:
592
+ st.markdown("#### Actual Outcome")
593
+ st.markdown(f"**Category:** {case['Category']}")
594
+ if actual_is_violent:
595
+ st.markdown(f"**Classification:** :red[{actual_label}]")
596
+ else:
597
+ st.markdown(f"**Classification:** :green[{actual_label}]")
598
+
599
+ st.markdown("---")
600
+ if prediction == actual_is_violent:
601
+ st.success("✅ **AI Model Correctly Classified this Incident**")
602
+ st.balloons()
603
+ else:
604
+ st.error("❌ **AI Model Incorrect** (Complex real-world variability)")
605
+
606
+ with tab6:
607
+ st.subheader("🚀 Advanced Prediction (99% Accuracy)")
608
+ st.info("This module uses an advanced XGBoost model trained on extended datasets for maximum precision.")
609
+
610
+ if new_artifacts:
611
+ model_xgb = new_artifacts['model']
612
+ le_target = new_artifacts['le_target']
613
+ addr_hasher = new_artifacts['addr_hasher']
614
+ desc_hasher = new_artifacts['desc_hasher']
615
+ dense_cols = new_artifacts['dense_cols']
616
+
617
+ col_input1, col_input2 = st.columns(2)
618
+
619
+ with col_input1:
620
+ adv_date = st.date_input("📅 Date", key="adv_date")
621
+ adv_time = st.time_input("⏰ Time", key="adv_time")
622
+ adv_lat = st.number_input("📍 Latitude", value=37.7749, format="%.6f", key="adv_lat")
623
+ adv_lng = st.number_input("📍 Longitude", value=-122.4194, format="%.6f", key="adv_lng")
624
+
625
+ with col_input2:
626
+ districts = sorted(['BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION', 'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN'])
627
+ adv_district = st.selectbox("🏢 Police District", districts, key="adv_district")
628
+ adv_address = st.text_input("📌 Address", "", key="adv_address")
629
+ adv_desc = st.text_area("📝 Description", "", key="adv_desc")
630
+
631
+ if st.button("⚡ Run Advanced Analysis", type="primary"):
632
+ try:
633
+ dt_obj = pd.to_datetime(f"{adv_date} {adv_time}")
634
+ hour = dt_obj.hour
635
+
636
+ dense_data = {
637
+ 'X': float(adv_lng),
638
+ 'Y': float(adv_lat),
639
+ 'Year': dt_obj.year,
640
+ 'Month': dt_obj.month,
641
+ 'Day': dt_obj.day,
642
+ 'Minute': dt_obj.minute,
643
+ 'Hour': hour,
644
+ 'Hour_sin': np.sin(2 * np.pi * hour / 24),
645
+ 'Hour_cos': np.cos(2 * np.pi * hour / 24),
646
+ 'PdDistrict_enc': districts.index(adv_district),
647
+ 'DayOfWeek_enc': dt_obj.dayofweek
648
+ }
649
+
650
+ dense_df = pd.DataFrame([dense_data])[dense_cols]
651
+ dense_sparse = csr_matrix(dense_df.values)
652
+
653
+ addr_hashed = addr_hasher.transform([adv_address.split()])
654
+ desc_hashed = desc_hasher.transform([adv_desc.split()])
655
+
656
+ features = hstack([dense_sparse, addr_hashed, desc_hashed])
657
+
658
+ probs = model_xgb.predict_proba(features)[0]
659
+ top_idx = np.argmax(probs)
660
+
661
+ category = le_target.inverse_transform([top_idx])[0]
662
+ confidence = probs[top_idx] * 100
663
+
664
+ st.markdown("---")
665
+ st.subheader("Analysis Results")
666
+
667
+ res_c1, res_c2 = st.columns([1, 2])
668
+
669
+ with res_c1:
670
+ st.success(f"### 🚨 Predicted: **{category}**")
671
+ st.metric("Confidence Score", f"{confidence:.2f}%")
672
+
673
+ with res_c2:
674
+ # Top 3 chart
675
+ top3 = probs.argsort()[-3:][::-1]
676
+ chart_data = pd.DataFrame({
677
+ "Category": le_target.inverse_transform(top3),
678
+ "Probability": probs[top3]
679
+ }).sort_values(by="Probability", ascending=True)
680
+
681
+ fig_adv = px.bar(chart_data, x="Probability", y="Category", orientation='h',
682
+ title="Top 3 Probable Categories", template='plotly_dark')
683
+ st.plotly_chart(fig_adv)
684
+
685
+ # AI Explanation
686
+ if adv_desc:
687
+ with st.spinner("🧠 Generating AI explanation..."):
688
+ explanation = explain_prediction_with_llama(
689
+ f"In 2-3 sentences, explain why a crime prediction model might classify an incident as '{category}' based on this description: '{adv_desc}'. Be concise and factual."
690
+ )
691
+ st.markdown("### 🧠 AI Analyst Insight")
692
+ st.info(explanation)
693
+
694
+ except Exception as e:
695
+ st.error(f"❌ Prediction Error: {e}")
696
+ else:
697
+ st.error("Advanced model artifacts not loaded.")
698
+
699
+ # ------------------- INTERACTIVE CHATBOT -------------------
700
+ st.markdown("---")
701
+ st.markdown("<div class='glass-card'>", unsafe_allow_html=True)
702
+ st.subheader("💬 AI Crime Safety Assistant")
703
+ st.markdown("Ask me anything about crime prediction, safety tips, or how this system works!", unsafe_allow_html=True)
704
+
705
+ # Initialize chat history in session state
706
+ if 'messages' not in st.session_state:
707
+ st.session_state.messages = [
708
+ {"role": "assistant", "content": "👋 Hello! I'm your AI Crime Safety Assistant. I can help you understand crime patterns, provide safety recommendations, and explain how our prediction model works. What would you like to know?"}
709
+ ]
710
+
711
+ # Display chat history
712
+ st.markdown("<div class='chat-container'>", unsafe_allow_html=True)
713
+ for message in st.session_state.messages:
714
+ if message["role"] == "user":
715
+ st.markdown(f"<div class='user-message'>🧑 {message['content']}</div>", unsafe_allow_html=True)
716
+ else:
717
+ st.markdown(f"<div class='ai-message'>🤖 {message['content']}</div>", unsafe_allow_html=True)
718
+ st.markdown("</div>", unsafe_allow_html=True)
719
+
720
+ # Chat input
721
+ col1, col2 = st.columns([5, 1])
722
+ with col1:
723
+ user_input = st.text_input("Type your message...", key="chat_input", label_visibility="collapsed", placeholder="Ask about crime safety, predictions, or get recommendations...")
724
+ with col2:
725
+ send_button = st.button("Send 📤", use_container_width=True)
726
+
727
+ # Handle chat submission
728
+ if send_button and user_input:
729
+ # Add user message to history
730
+ st.session_state.messages.append({"role": "user", "content": user_input})
731
+
732
+ # Get AI response using Groq
733
+ with st.spinner("🧠 Thinking..."):
734
+ try:
735
+ client = get_groq_client()
736
+
737
+ # Create system prompt for crime prediction context
738
+ system_prompt = """You are an AI Crime Safety Assistant for a crime prediction system.
739
+ You help users understand:
740
+ - Crime patterns and trends in San Francisco
741
+ - How the XGBoost machine learning model predicts crime categories
742
+ - Safety tips and recommendations based on location and time
743
+ - What factors influence crime predictions (time, location, historical data)
744
+
745
+ Be helpful, concise, and informative. Keep responses to 2-3 sentences unless more detail is needed.
746
+ If asked about the model, explain it uses features like latitude, longitude, time, district, and description to predict crime types."""
747
+
748
+ # Prepare messages for Groq API
749
+ api_messages = [{"role": "system", "content": system_prompt}]
750
+
751
+ # Add recent chat history (last 5 messages for context)
752
+ for msg in st.session_state.messages[-5:]:
753
+ api_messages.append({"role": msg["role"], "content": msg["content"]})
754
+
755
+ # Get response from Groq
756
+ chat_completion = client.chat.completions.create(
757
+ messages=api_messages,
758
+ model="llama-3.3-70b-versatile",
759
+ temperature=0.7,
760
+ max_tokens=500
761
+ )
762
+
763
+ ai_response = chat_completion.choices[0].message.content
764
+
765
+ # Add AI response to history
766
+ st.session_state.messages.append({"role": "assistant", "content": ai_response})
767
+
768
+ except Exception as e:
769
+ error_msg = f"⚠️ Sorry, I encountered an error: {str(e)}"
770
+ st.session_state.messages.append({"role": "assistant", "content": error_msg})
771
+
772
+ # Rerun to update chat display
773
+ st.rerun()
774
 
775
+ st.markdown("</div>", unsafe_allow_html=True)