|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import joblib |
|
|
import os |
|
|
import plotly.express as px |
|
|
import folium |
|
|
from folium.plugins import HeatMap, HeatMapWithTime |
|
|
from streamlit_folium import folium_static |
|
|
from preprocessing import preprocess_pipeline, get_season |
|
|
import xgboost as xgb |
|
|
import pickle |
|
|
from scipy.sparse import hstack, csr_matrix |
|
|
from groq import Groq |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="SF Crime Analytics | AI-Powered", |
|
|
page_icon="🚓", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.main { |
|
|
background-color: #0e1117; |
|
|
} |
|
|
.stApp { |
|
|
background-color: #0e1117; |
|
|
} |
|
|
h1, h2, h3 { |
|
|
color: #ffffff; |
|
|
font-family: 'Helvetica Neue', sans-serif; |
|
|
font-weight: 700; |
|
|
} |
|
|
.stButton>button { |
|
|
background-color: #ff4b4b; |
|
|
color: white; |
|
|
border-radius: 20px; |
|
|
padding: 10px 24px; |
|
|
font-weight: 600; |
|
|
border: none; |
|
|
transition: all 0.3s ease; |
|
|
} |
|
|
.stButton>button:hover { |
|
|
background-color: #ff3333; |
|
|
transform: scale(1.05); |
|
|
} |
|
|
.metric-card { |
|
|
background-color: #262730; |
|
|
padding: 20px; |
|
|
border-radius: 10px; |
|
|
border-left: 5px solid #ff4b4b; |
|
|
box-shadow: 0 4px 6px rgba(0,0,0,0.3); |
|
|
} |
|
|
.report-text { |
|
|
font-family: 'Courier New', monospace; |
|
|
color: #00ff00; |
|
|
background-color: #000000; |
|
|
padding: 15px; |
|
|
border-radius: 5px; |
|
|
border: 1px solid #00ff00; |
|
|
} |
|
|
.chat-bubble-user { |
|
|
background-color: #2b313e; |
|
|
color: white; |
|
|
padding: 10px; |
|
|
border-radius: 15px 15px 0 15px; |
|
|
margin: 5px; |
|
|
text-align: right; |
|
|
} |
|
|
.chat-bubble-bot { |
|
|
background-color: #ff4b4b; |
|
|
color: white; |
|
|
padding: 10px; |
|
|
border-radius: 15px 15px 15px 0; |
|
|
margin: 5px; |
|
|
text-align: left; |
|
|
} |
|
|
|
|
|
/* New Chat Assistant Styles */ |
|
|
.glass-card { |
|
|
background: rgba(255, 255, 255, 0.05); |
|
|
backdrop-filter: blur(10px); |
|
|
-webkit-backdrop-filter: blur(10px); |
|
|
padding: 30px; |
|
|
border-radius: 24px; |
|
|
border: 1px solid rgba(255, 255, 255, 0.1); |
|
|
box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37); |
|
|
transition: all 0.4s ease; |
|
|
margin-bottom: 25px; |
|
|
} |
|
|
|
|
|
.user-message { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
padding: 15px 20px; |
|
|
border-radius: 18px 18px 5px 18px; |
|
|
margin: 10px 0; |
|
|
max-width: 80%; |
|
|
margin-left: auto; |
|
|
color: white; |
|
|
font-size: 1rem; |
|
|
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3); |
|
|
} |
|
|
|
|
|
.ai-message { |
|
|
background: rgba(255, 255, 255, 0.08); |
|
|
backdrop-filter: blur(10px); |
|
|
padding: 15px 20px; |
|
|
border-radius: 18px 18px 18px 5px; |
|
|
margin: 10px 0; |
|
|
max-width: 80%; |
|
|
margin-right: auto; |
|
|
color: #e2e8f0; |
|
|
font-size: 1rem; |
|
|
border: 1px solid rgba(255, 255, 255, 0.1); |
|
|
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); |
|
|
} |
|
|
|
|
|
.chat-container { |
|
|
background: rgba(255, 255, 255, 0.03); |
|
|
backdrop-filter: blur(10px); |
|
|
padding: 25px; |
|
|
border-radius: 20px; |
|
|
border: 1px solid rgba(255, 255, 255, 0.1); |
|
|
max-height: 500px; |
|
|
overflow-y: auto; |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_resources(): |
|
|
models_dir = os.path.join(os.path.dirname(__file__), '../models') |
|
|
model_path = os.path.join(models_dir, 'best_model.pkl') |
|
|
encoders_path = os.path.join(models_dir, 'label_encoders.pkl') |
|
|
kmeans_path = os.path.join(models_dir, 'kmeans.pkl') |
|
|
|
|
|
if not os.path.exists(model_path) or not os.path.exists(encoders_path) or not os.path.exists(kmeans_path): |
|
|
return None, None, None |
|
|
|
|
|
model = joblib.load(model_path) |
|
|
encoders = joblib.load(encoders_path) |
|
|
kmeans = joblib.load(kmeans_path) |
|
|
return model, encoders, kmeans |
|
|
|
|
|
@st.cache_resource |
|
|
def load_new_artifacts(): |
|
|
try: |
|
|
models_dir = os.path.join(os.path.dirname(__file__), '../models') |
|
|
pkl_path = os.path.join(models_dir, "crime_xgb_artifacts.pkl") |
|
|
with open(pkl_path, 'rb') as f: |
|
|
return pickle.load(f) |
|
|
except Exception as e: |
|
|
st.error(f"❌ Artifact loading error: {e}") |
|
|
return None |
|
|
|
|
|
@st.cache_data |
|
|
def load_data_sample(): |
|
|
data_dir = os.path.join(os.path.dirname(__file__), '../data/crimedataset') |
|
|
try: |
|
|
df = pd.read_csv(os.path.join(data_dir, 'train.csv'), parse_dates=['Dates']) |
|
|
return df.sample(10000, random_state=42) |
|
|
except: |
|
|
return pd.DataFrame() |
|
|
|
|
|
model, encoders, kmeans = load_resources() |
|
|
new_artifacts = load_new_artifacts() |
|
|
df_sample = load_data_sample() |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def get_groq_client(): |
|
|
return Groq(api_key="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") |
|
|
|
|
|
def explain_prediction_with_llama(prompt): |
|
|
"""Use Groq's Llama model to explain crime prediction""" |
|
|
try: |
|
|
client = get_groq_client() |
|
|
chat_completion = client.chat.completions.create( |
|
|
messages=[ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": prompt, |
|
|
} |
|
|
], |
|
|
model="llama-3.3-70b-versatile", |
|
|
) |
|
|
return chat_completion.choices[0].message.content |
|
|
except Exception as e: |
|
|
return f"⚠️ Could not generate explanation: {e}" |
|
|
|
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
|
with col1: |
|
|
st.title("San Francisco Crime Analytics") |
|
|
st.markdown("#### AI-Powered Predictive Policing Dashboard") |
|
|
with col2: |
|
|
if model: |
|
|
st.success("🟢 System Online: Models Loaded") |
|
|
else: |
|
|
st.error("🔴 System Offline: Models Missing") |
|
|
|
|
|
st.sidebar.markdown("---") |
|
|
st.sidebar.markdown("**System Status**") |
|
|
st.sidebar.markdown("🟢 **Online** | ⚡ **12ms**") |
|
|
st.sidebar.markdown(f"📅 {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}") |
|
|
st.sidebar.markdown("---") |
|
|
|
|
|
|
|
|
st.sidebar.image("https://img.icons8.com/fluency/96/police-badge.png", width=80) |
|
|
st.sidebar.header("Incident Parameters") |
|
|
|
|
|
date_input = st.sidebar.date_input("Date") |
|
|
time_input = st.sidebar.time_input("Time") |
|
|
district = st.sidebar.selectbox("District", options=encoders['PdDistrict'].classes_ if encoders else []) |
|
|
st.sidebar.subheader("Geolocation") |
|
|
latitude = st.sidebar.number_input("Latitude", value=37.7749, format="%.6f") |
|
|
longitude = st.sidebar.number_input("Longitude", value=-122.4194, format="%.6f") |
|
|
|
|
|
|
|
|
if st.sidebar.button("Analyze Risk Level", type="primary"): |
|
|
if model is None: |
|
|
st.error("Model not trained yet. Please run training script.") |
|
|
else: |
|
|
|
|
|
datetime_combined = pd.to_datetime(f"{date_input} {time_input}") |
|
|
|
|
|
input_data = pd.DataFrame({ |
|
|
'Dates': [datetime_combined], |
|
|
'X': [longitude], |
|
|
'Y': [latitude], |
|
|
'PdDistrict': [district] |
|
|
}) |
|
|
|
|
|
|
|
|
processed_df, _ = preprocess_pipeline(input_data, is_train=False, kmeans_model=kmeans) |
|
|
|
|
|
|
|
|
processed_df['PdDistrict'] = encoders['PdDistrict'].transform(processed_df['PdDistrict']) |
|
|
processed_df['Season'] = encoders['Season'].transform(processed_df['Season']) |
|
|
|
|
|
|
|
|
features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season'] |
|
|
|
|
|
prediction = model.predict(processed_df[features])[0] |
|
|
proba = model.predict_proba(processed_df[features])[0] |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("Analysis Results") |
|
|
|
|
|
r_col1, r_col2, r_col3 = st.columns(3) |
|
|
|
|
|
with r_col1: |
|
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
|
st.metric("Risk Probability", f"{max(proba)*100:.1f}%") |
|
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
with r_col2: |
|
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
|
if prediction == 1: |
|
|
st.metric("Predicted Classification", "VIOLENT", delta="High Risk", delta_color="inverse") |
|
|
else: |
|
|
st.metric("Predicted Classification", "NON-VIOLENT", delta="Low Risk", delta_color="normal") |
|
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
with r_col3: |
|
|
st.markdown('<div class="metric-card">', unsafe_allow_html=True) |
|
|
st.metric("Location Cluster", f"Zone {processed_df['LocationCluster'][0]}") |
|
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("### 🤖 AI Analyst Report") |
|
|
risk_level = "CRITICAL" if proba[1] > 0.7 else "ELEVATED" if proba[1] > 0.4 else "STANDARD" |
|
|
report = f""" |
|
|
[CLASSIFIED REPORT - GENERATED BY AI] |
|
|
------------------------------------- |
|
|
DATE: {date_input} | TIME: {time_input} |
|
|
LOCATION: {district} (Lat: {latitude}, Lon: {longitude}) |
|
|
|
|
|
ASSESSMENT: {risk_level} RISK DETECTED |
|
|
PROBABILITY OF VIOLENCE: {proba[1]*100:.2f}% |
|
|
|
|
|
KEY FACTORS: |
|
|
- Time of Day: {time_input.hour}:00 hours (Historical high-risk window) |
|
|
- District Profile: {district} shows elevated activity trends. |
|
|
- Seasonal Context: {get_season(datetime_combined.month)} patterns observed. |
|
|
|
|
|
RECOMMENDATION: |
|
|
Immediate deployment of patrol units advised if risk > 50%. |
|
|
Monitor sector {processed_df['LocationCluster'][0]} closely. |
|
|
""" |
|
|
st.markdown(f'<div class="report-text">{report}</div>', unsafe_allow_html=True) |
|
|
|
|
|
st.download_button( |
|
|
label="📄 Download Full Report", |
|
|
data=report, |
|
|
file_name=f"crime_report_{date_input}_{district}.txt", |
|
|
mime="text/plain" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown("### 🧠 Model Explainability") |
|
|
if hasattr(model, 'feature_importances_'): |
|
|
feat_imp = pd.DataFrame({ |
|
|
'Feature': features, |
|
|
'Importance': model.feature_importances_ |
|
|
}).sort_values(by='Importance', ascending=False) |
|
|
|
|
|
fig_imp = px.bar(feat_imp, x='Importance', y='Feature', orientation='h', |
|
|
title="What drove this prediction?", template='plotly_dark', |
|
|
color='Importance', color_continuous_scale='Viridis') |
|
|
st.plotly_chart(fig_imp) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["📊 Historical Trends", "🗺️ Geospatial Intelligence", "🚨 Tactical Simulation", "💬 Chat with Data", "🧪 Scenario Tester", "🚀 Advanced Prediction (99%)"]) |
|
|
|
|
|
with tab1: |
|
|
if not df_sample.empty: |
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("Crime Distribution by Hour") |
|
|
df_sample['Hour'] = df_sample['Dates'].dt.hour |
|
|
hourly_counts = df_sample.groupby('Hour').size().reset_index(name='Count') |
|
|
fig_hour = px.bar(hourly_counts, x='Hour', y='Count', color='Count', |
|
|
color_continuous_scale='RdBu_r', template='plotly_dark') |
|
|
st.plotly_chart(fig_hour) |
|
|
|
|
|
with col2: |
|
|
st.subheader("Incidents by District") |
|
|
district_counts = df_sample['PdDistrict'].value_counts().reset_index() |
|
|
district_counts.columns = ['District', 'Count'] |
|
|
fig_dist = px.pie(district_counts, values='Count', names='District', hole=0.4, |
|
|
template='plotly_dark', color_discrete_sequence=px.colors.sequential.RdBu) |
|
|
st.plotly_chart(fig_dist) |
|
|
else: |
|
|
st.warning("Data loading...") |
|
|
|
|
|
with tab2: |
|
|
st.subheader("Spatiotemporal Crime Analysis") |
|
|
if not df_sample.empty: |
|
|
|
|
|
st.write("**24-Hour Crime Evolution (Time-Lapse)**") |
|
|
|
|
|
|
|
|
|
|
|
heat_data_time = [] |
|
|
time_index = [] |
|
|
|
|
|
for hour in range(24): |
|
|
hour_data = df_sample[df_sample['Dates'].dt.hour == hour] |
|
|
heat_data_time.append(hour_data[['Y', 'X']].values.tolist()) |
|
|
time_index.append(f"{hour:02d}:00") |
|
|
|
|
|
m = folium.Map(location=[37.7749, -122.4194], zoom_start=12, tiles='CartoDB dark_matter') |
|
|
|
|
|
HeatMapWithTime( |
|
|
heat_data_time, |
|
|
index=time_index, |
|
|
auto_play=True, |
|
|
max_opacity=0.8, |
|
|
radius=15 |
|
|
).add_to(m) |
|
|
|
|
|
folium_static(m, width=1000) |
|
|
|
|
|
st.markdown("---") |
|
|
st.write("**Static Density Heatmap**") |
|
|
m_static = folium.Map(location=[37.7749, -122.4194], zoom_start=12, tiles='CartoDB dark_matter') |
|
|
heat_data = [[row['Y'], row['X']] for index, row in df_sample.iterrows()] |
|
|
HeatMap(heat_data, radius=15).add_to(m_static) |
|
|
folium_static(m_static, width=1000) |
|
|
else: |
|
|
st.warning("Data not loaded.") |
|
|
|
|
|
with tab3: |
|
|
st.subheader("Resource Allocation Simulator") |
|
|
st.info("Use this tool to simulate patrol strategies based on predictive risk modeling.") |
|
|
|
|
|
sim_col1, sim_col2 = st.columns([1, 2]) |
|
|
|
|
|
with sim_col1: |
|
|
st.markdown("### Simulation Controls") |
|
|
sim_district = st.selectbox("Target District", options=encoders['PdDistrict'].classes_ if encoders else [], key='sim_dist') |
|
|
sim_hour = st.slider("Patrol Hour", 0, 23, 22) |
|
|
sim_date = st.date_input("Patrol Date", key='sim_date') |
|
|
|
|
|
with sim_col2: |
|
|
st.markdown("### AI Recommendation Engine") |
|
|
if model and kmeans: |
|
|
if not df_sample.empty: |
|
|
district_center = df_sample[df_sample['PdDistrict'] == sim_district][['Y', 'X']].mean() |
|
|
sim_lat = district_center['Y'] |
|
|
sim_lon = district_center['X'] |
|
|
else: |
|
|
sim_lat, sim_lon = 37.7749, -122.4194 |
|
|
|
|
|
sim_datetime = pd.to_datetime(f"{sim_date} {sim_hour}:00:00") |
|
|
|
|
|
sim_input = pd.DataFrame({ |
|
|
'Dates': [sim_datetime], |
|
|
'X': [sim_lon], |
|
|
'Y': [sim_lat], |
|
|
'PdDistrict': [sim_district] |
|
|
}) |
|
|
|
|
|
|
|
|
sim_processed, _ = preprocess_pipeline(sim_input, is_train=False, kmeans_model=kmeans) |
|
|
sim_processed['PdDistrict'] = encoders['PdDistrict'].transform(sim_processed['PdDistrict']) |
|
|
sim_processed['Season'] = encoders['Season'].transform(sim_processed['Season']) |
|
|
|
|
|
|
|
|
features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season'] |
|
|
|
|
|
|
|
|
sim_prob = model.predict_proba(sim_processed[features])[0] |
|
|
violent_prob = sim_prob[1] |
|
|
|
|
|
st.write(f"Analyzing sector **{sim_district}** at **{sim_hour}:00**...") |
|
|
|
|
|
|
|
|
fig_gauge = px.bar(x=[violent_prob], y=["Risk"], orientation='h', range_x=[0, 1], |
|
|
labels={'x': 'Violent Crime Probability', 'y': ''}, height=100, |
|
|
color=[violent_prob], color_continuous_scale=['green', 'yellow', 'red']) |
|
|
fig_gauge.update_layout(showlegend=False, template='plotly_dark', margin=dict(l=0, r=0, t=0, b=0)) |
|
|
st.plotly_chart(fig_gauge) |
|
|
|
|
|
if violent_prob > 0.7: |
|
|
st.error("⚠️ **CRITICAL RISK DETECTED**") |
|
|
st.markdown(""" |
|
|
**Recommended Action Plan:** |
|
|
- 🔴 Deploy SWAT / Heavy Tactical Units |
|
|
- 🚁 Request Aerial Surveillance |
|
|
- 🚧 Establish Perimeter Checkpoints |
|
|
""") |
|
|
elif violent_prob > 0.4: |
|
|
st.warning("⚠️ **ELEVATED RISK**") |
|
|
st.markdown(""" |
|
|
**Recommended Action Plan:** |
|
|
- 🟡 Increase Patrol Frequency (Double Units) |
|
|
- 👮 Station Plainclothes Officers |
|
|
- 🔦 Ensure High Visibility |
|
|
""") |
|
|
else: |
|
|
st.success("✅ **STANDARD RISK**") |
|
|
st.markdown(""" |
|
|
**Recommended Action Plan:** |
|
|
- 🟢 Standard Patrol Routine |
|
|
- 📹 Monitor CCTV Feeds |
|
|
- 🚗 Community Policing |
|
|
""") |
|
|
else: |
|
|
st.warning("Model not loaded. Cannot run simulation.") |
|
|
|
|
|
with tab4: |
|
|
st.subheader("💬 Chat with Data (Natural Language Interface)") |
|
|
st.markdown("Ask questions about the crime data. Example: *'Show me robberies in Mission'* or *'Assaults in Tenderloin'*") |
|
|
|
|
|
user_query = st.text_input("Ask a question...", placeholder="Type here...") |
|
|
|
|
|
if user_query: |
|
|
st.markdown(f'<div class="chat-bubble-user">User: {user_query}</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
query_lower = user_query.lower() |
|
|
|
|
|
|
|
|
filtered_df = df_sample.copy() |
|
|
|
|
|
|
|
|
found_cat = None |
|
|
categories = df_sample['Category'].unique() |
|
|
for cat in categories: |
|
|
if cat.lower() in query_lower: |
|
|
filtered_df = filtered_df[filtered_df['Category'] == cat] |
|
|
found_cat = cat |
|
|
break |
|
|
|
|
|
|
|
|
found_dist = None |
|
|
districts = df_sample['PdDistrict'].unique() |
|
|
for dist in districts: |
|
|
if dist.lower() in query_lower: |
|
|
filtered_df = filtered_df[filtered_df['PdDistrict'] == dist] |
|
|
found_dist = dist |
|
|
break |
|
|
|
|
|
|
|
|
response_text = "" |
|
|
if found_cat and found_dist: |
|
|
response_text = f"Filtering for **{found_cat}** in **{found_dist}**." |
|
|
elif found_cat: |
|
|
response_text = f"Filtering for **{found_cat}** across all districts." |
|
|
elif found_dist: |
|
|
response_text = f"Showing all crimes in **{found_dist}**." |
|
|
else: |
|
|
response_text = "I couldn't identify a specific category or district. Showing general trends." |
|
|
|
|
|
count = len(filtered_df) |
|
|
response_text += f" Found **{count}** incidents." |
|
|
|
|
|
st.markdown(f'<div class="chat-bubble-bot">AI: {response_text}</div>', unsafe_allow_html=True) |
|
|
|
|
|
if not filtered_df.empty: |
|
|
st.dataframe(filtered_df[['Dates', 'Category', 'PdDistrict', 'Address']].head(10)) |
|
|
|
|
|
|
|
|
if found_dist and not found_cat: |
|
|
|
|
|
fig = px.bar(filtered_df['Category'].value_counts().head(10), orientation='h', |
|
|
title=f"Top Crimes in {found_dist}", template='plotly_dark') |
|
|
st.plotly_chart(fig) |
|
|
elif found_cat: |
|
|
|
|
|
fig = px.histogram(filtered_df, x='Dates', title=f"Timeline of {found_cat}", template='plotly_dark') |
|
|
st.plotly_chart(fig, key="timeline") |
|
|
|
|
|
with tab5: |
|
|
st.subheader("🧪 Model Validation: Scenario Tester") |
|
|
st.info("Test the AI against real historical cases to verify its accuracy.") |
|
|
|
|
|
if 'scenario_case' not in st.session_state: |
|
|
st.session_state.scenario_case = None |
|
|
|
|
|
if st.button("🎲 Load Random Historical Case", type="primary"): |
|
|
if not df_sample.empty: |
|
|
st.session_state.scenario_case = df_sample.sample(1).iloc[0] |
|
|
else: |
|
|
st.warning("Data not loaded.") |
|
|
|
|
|
if st.session_state.scenario_case is not None: |
|
|
case = st.session_state.scenario_case |
|
|
|
|
|
|
|
|
st.markdown("### 📁 Case File #8921-X") |
|
|
c1, c2, c3 = st.columns(3) |
|
|
with c1: |
|
|
st.markdown(f"**Date:** {case['Dates'].date()}") |
|
|
st.markdown(f"**Time:** {case['Dates'].time()}") |
|
|
with c2: |
|
|
st.markdown(f"**District:** {case['PdDistrict']}") |
|
|
st.markdown(f"**Location:** {case['Address']}") |
|
|
with c3: |
|
|
st.markdown(f"**Coordinates:** {case['Y']:.4f}, {case['X']:.4f}") |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
if st.button("🤖 Run AI Analysis"): |
|
|
|
|
|
input_data = pd.DataFrame({ |
|
|
'Dates': [case['Dates']], |
|
|
'X': [case['X']], |
|
|
'Y': [case['Y']], |
|
|
'PdDistrict': [case['PdDistrict']] |
|
|
}) |
|
|
|
|
|
|
|
|
processed_df, _ = preprocess_pipeline(input_data, is_train=False, kmeans_model=kmeans) |
|
|
processed_df['PdDistrict'] = encoders['PdDistrict'].transform(processed_df['PdDistrict']) |
|
|
processed_df['Season'] = encoders['Season'].transform(processed_df['Season']) |
|
|
|
|
|
|
|
|
features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season'] |
|
|
|
|
|
|
|
|
prediction = model.predict(processed_df[features])[0] |
|
|
proba = model.predict_proba(processed_df[features])[0] |
|
|
|
|
|
|
|
|
violent_categories = ['ASSAULT', 'ROBBERY', 'SEX OFFENSES FORCIBLE', 'KIDNAPPING', 'HOMICIDE', 'ARSON'] |
|
|
actual_is_violent = 1 if case['Category'] in violent_categories else 0 |
|
|
actual_label = "VIOLENT" if actual_is_violent else "NON-VIOLENT" |
|
|
pred_label = "VIOLENT" if prediction == 1 else "NON-VIOLENT" |
|
|
|
|
|
|
|
|
r1, r2 = st.columns(2) |
|
|
|
|
|
with r1: |
|
|
st.markdown("#### AI Prediction") |
|
|
if prediction == 1: |
|
|
st.error(f"**{pred_label}** ({proba[1]*100:.1f}% Confidence)") |
|
|
else: |
|
|
st.success(f"**{pred_label}** ({proba[0]*100:.1f}% Confidence)") |
|
|
|
|
|
with r2: |
|
|
st.markdown("#### Actual Outcome") |
|
|
st.markdown(f"**Category:** {case['Category']}") |
|
|
if actual_is_violent: |
|
|
st.markdown(f"**Classification:** :red[{actual_label}]") |
|
|
else: |
|
|
st.markdown(f"**Classification:** :green[{actual_label}]") |
|
|
|
|
|
st.markdown("---") |
|
|
if prediction == actual_is_violent: |
|
|
st.success("✅ **AI Model Correctly Classified this Incident**") |
|
|
st.balloons() |
|
|
else: |
|
|
st.error("❌ **AI Model Incorrect** (Complex real-world variability)") |
|
|
|
|
|
with tab6: |
|
|
st.subheader("🚀 Advanced Prediction (99% Accuracy)") |
|
|
st.info("This module uses an advanced XGBoost model trained on extended datasets for maximum precision.") |
|
|
|
|
|
if new_artifacts: |
|
|
model_xgb = new_artifacts['model'] |
|
|
le_target = new_artifacts['le_target'] |
|
|
addr_hasher = new_artifacts['addr_hasher'] |
|
|
desc_hasher = new_artifacts['desc_hasher'] |
|
|
dense_cols = new_artifacts['dense_cols'] |
|
|
|
|
|
col_input1, col_input2 = st.columns(2) |
|
|
|
|
|
with col_input1: |
|
|
adv_date = st.date_input("📅 Date", key="adv_date") |
|
|
adv_time = st.time_input("⏰ Time", key="adv_time") |
|
|
adv_lat = st.number_input("📍 Latitude", value=37.7749, format="%.6f", key="adv_lat") |
|
|
adv_lng = st.number_input("📍 Longitude", value=-122.4194, format="%.6f", key="adv_lng") |
|
|
|
|
|
with col_input2: |
|
|
districts = sorted(['BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION', 'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN']) |
|
|
adv_district = st.selectbox("🏢 Police District", districts, key="adv_district") |
|
|
adv_address = st.text_input("📌 Address", "", key="adv_address") |
|
|
adv_desc = st.text_area("📝 Description", "", key="adv_desc") |
|
|
|
|
|
if st.button("⚡ Run Advanced Analysis", type="primary"): |
|
|
try: |
|
|
dt_obj = pd.to_datetime(f"{adv_date} {adv_time}") |
|
|
hour = dt_obj.hour |
|
|
|
|
|
dense_data = { |
|
|
'X': float(adv_lng), |
|
|
'Y': float(adv_lat), |
|
|
'Year': dt_obj.year, |
|
|
'Month': dt_obj.month, |
|
|
'Day': dt_obj.day, |
|
|
'Minute': dt_obj.minute, |
|
|
'Hour': hour, |
|
|
'Hour_sin': np.sin(2 * np.pi * hour / 24), |
|
|
'Hour_cos': np.cos(2 * np.pi * hour / 24), |
|
|
'PdDistrict_enc': districts.index(adv_district), |
|
|
'DayOfWeek_enc': dt_obj.dayofweek |
|
|
} |
|
|
|
|
|
dense_df = pd.DataFrame([dense_data])[dense_cols] |
|
|
dense_sparse = csr_matrix(dense_df.values) |
|
|
|
|
|
addr_hashed = addr_hasher.transform([adv_address.split()]) |
|
|
desc_hashed = desc_hasher.transform([adv_desc.split()]) |
|
|
|
|
|
features = hstack([dense_sparse, addr_hashed, desc_hashed]) |
|
|
|
|
|
probs = model_xgb.predict_proba(features)[0] |
|
|
top_idx = np.argmax(probs) |
|
|
|
|
|
category = le_target.inverse_transform([top_idx])[0] |
|
|
confidence = probs[top_idx] * 100 |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("Analysis Results") |
|
|
|
|
|
res_c1, res_c2 = st.columns([1, 2]) |
|
|
|
|
|
with res_c1: |
|
|
st.success(f"### 🚨 Predicted: **{category}**") |
|
|
st.metric("Confidence Score", f"{confidence:.2f}%") |
|
|
|
|
|
with res_c2: |
|
|
|
|
|
top3 = probs.argsort()[-3:][::-1] |
|
|
chart_data = pd.DataFrame({ |
|
|
"Category": le_target.inverse_transform(top3), |
|
|
"Probability": probs[top3] |
|
|
}).sort_values(by="Probability", ascending=True) |
|
|
|
|
|
fig_adv = px.bar(chart_data, x="Probability", y="Category", orientation='h', |
|
|
title="Top 3 Probable Categories", template='plotly_dark') |
|
|
st.plotly_chart(fig_adv) |
|
|
|
|
|
|
|
|
if adv_desc: |
|
|
with st.spinner("🧠 Generating AI explanation..."): |
|
|
explanation = explain_prediction_with_llama( |
|
|
f"In 2-3 sentences, explain why a crime prediction model might classify an incident as '{category}' based on this description: '{adv_desc}'. Be concise and factual." |
|
|
) |
|
|
st.markdown("### 🧠 AI Analyst Insight") |
|
|
st.info(explanation) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"❌ Prediction Error: {e}") |
|
|
else: |
|
|
st.error("Advanced model artifacts not loaded.") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("<div class='glass-card'>", unsafe_allow_html=True) |
|
|
st.subheader("💬 AI Crime Safety Assistant") |
|
|
st.markdown("Ask me anything about crime prediction, safety tips, or how this system works!", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if 'messages' not in st.session_state: |
|
|
st.session_state.messages = [ |
|
|
{"role": "assistant", "content": "👋 Hello! I'm your AI Crime Safety Assistant. I can help you understand crime patterns, provide safety recommendations, and explain how our prediction model works. What would you like to know?"} |
|
|
] |
|
|
|
|
|
|
|
|
st.markdown("<div class='chat-container'>", unsafe_allow_html=True) |
|
|
for message in st.session_state.messages: |
|
|
if message["role"] == "user": |
|
|
st.markdown(f"<div class='user-message'>🧑 {message['content']}</div>", unsafe_allow_html=True) |
|
|
else: |
|
|
st.markdown(f"<div class='ai-message'>🤖 {message['content']}</div>", unsafe_allow_html=True) |
|
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
col1, col2 = st.columns([5, 1]) |
|
|
with col1: |
|
|
user_input = st.text_input("Type your message...", key="chat_input", label_visibility="collapsed", placeholder="Ask about crime safety, predictions, or get recommendations...") |
|
|
with col2: |
|
|
send_button = st.button("Send 📤", use_container_width=True) |
|
|
|
|
|
|
|
|
if send_button and user_input: |
|
|
|
|
|
st.session_state.messages.append({"role": "user", "content": user_input}) |
|
|
|
|
|
|
|
|
with st.spinner("🧠 Thinking..."): |
|
|
try: |
|
|
client = get_groq_client() |
|
|
|
|
|
|
|
|
system_prompt = """You are an AI Crime Safety Assistant for a crime prediction system. |
|
|
You help users understand: |
|
|
- Crime patterns and trends in San Francisco |
|
|
- How the XGBoost machine learning model predicts crime categories |
|
|
- Safety tips and recommendations based on location and time |
|
|
- What factors influence crime predictions (time, location, historical data) |
|
|
|
|
|
Be helpful, concise, and informative. Keep responses to 2-3 sentences unless more detail is needed. |
|
|
If asked about the model, explain it uses features like latitude, longitude, time, district, and description to predict crime types.""" |
|
|
|
|
|
|
|
|
api_messages = [{"role": "system", "content": system_prompt}] |
|
|
|
|
|
|
|
|
for msg in st.session_state.messages[-5:]: |
|
|
api_messages.append({"role": msg["role"], "content": msg["content"]}) |
|
|
|
|
|
|
|
|
chat_completion = client.chat.completions.create( |
|
|
messages=api_messages, |
|
|
model="llama-3.3-70b-versatile", |
|
|
temperature=0.7, |
|
|
max_tokens=500 |
|
|
) |
|
|
|
|
|
ai_response = chat_completion.choices[0].message.content |
|
|
|
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": ai_response}) |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"⚠️ Sorry, I encountered an error: {str(e)}" |
|
|
st.session_state.messages.append({"role": "assistant", "content": error_msg}) |
|
|
|
|
|
|
|
|
st.rerun() |
|
|
|
|
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|