import streamlit as st
import pandas as pd
import numpy as np
import os
import xgboost as xgb
import pickle
import datetime
from scipy.sparse import hstack, csr_matrix
from groq import Groq
# ------------------- PAGE CONFIG -------------------
st.set_page_config(
page_title="AI Crime Predictor",
page_icon="🚓",
layout="wide",
)
# ------------------- CUSTOM CSS -------------------
st.markdown("""
""", unsafe_allow_html=True)
# ------------------- TITLE -------------------
st.markdown('
🚓 AI Crime Prediction System
', unsafe_allow_html=True)
st.markdown('Predict crime category using time, location, and incident description.
', unsafe_allow_html=True)
# ------------------- LOAD MODEL -------------------
@st.cache_resource
def load_artifacts():
try:
# path relative to streamlit_app.py
pkl_path = "src/crime_xgb_artifacts.pkl"
with open(pkl_path, 'rb') as f:
return pickle.load(f)
except Exception as e:
st.error(f"❌ Artifact loading error: {e}")
return None
artifacts = load_artifacts()
if not artifacts:
st.warning("Artifacts missing! Add `crime_xgb_artifacts.pkl` in directory.")
st.stop()
model = artifacts['model']
le_target = artifacts['le_target']
addr_hasher = artifacts['addr_hasher']
desc_hasher = artifacts['desc_hasher']
dense_cols = artifacts['dense_cols']
# ------------------- GROQ SETUP -------------------
@st.cache_resource
def get_groq_client():
return Groq(api_key="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")
def explain_prediction_with_llama(prompt):
"""Use Groq's Llama model to explain crime prediction"""
try:
client = get_groq_client()
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="llama-3.3-70b-versatile",
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"⚠️ Could not generate explanation: {e}"
# ------------------- SIDEBAR -------------------
st.sidebar.title("📝 Input Features")
date = st.sidebar.date_input("📅 Date", datetime.date.today())
time = st.sidebar.time_input("⏰ Time", datetime.datetime.now().time())
default_lat = 37.7749
default_lng = -122.4194
lat = st.sidebar.number_input("📍 Latitude", value=default_lat, format="%.6f")
lng = st.sidebar.number_input("📍 Longitude", value=default_lng, format="%.6f")
districts = sorted(['BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION', 'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN'])
district = st.sidebar.selectbox("🏢 Police District", districts)
address = st.sidebar.text_input("📌 Address", "")
description = st.sidebar.text_area("📝 Description", "")
# ------------------- MAIN PREDICTION CARD -------------------
with st.container():
st.markdown("", unsafe_allow_html=True)
st.subheader("🔍 Prediction Panel")
if st.button("🚓 Predict Crime Category"):
try:
dt_obj = pd.to_datetime(f"{date} {time}")
hour = dt_obj.hour
dense_data = {
'X': float(lng),
'Y': float(lat),
'Year': dt_obj.year,
'Month': dt_obj.month,
'Day': dt_obj.day,
'Minute': dt_obj.minute,
'Hour': hour,
'Hour_sin': np.sin(2 * np.pi * hour / 24),
'Hour_cos': np.cos(2 * np.pi * hour / 24),
'PdDistrict_enc': districts.index(district),
'DayOfWeek_enc': dt_obj.dayofweek
}
dense_df = pd.DataFrame([dense_data])[dense_cols]
dense_sparse = csr_matrix(dense_df.values)
addr_hashed = addr_hasher.transform([address.split()])
desc_hashed = desc_hasher.transform([description.split()])
features = hstack([dense_sparse, addr_hashed, desc_hashed])
probs = model.predict_proba(features)[0]
top_idx = np.argmax(probs)
category = le_target.inverse_transform([top_idx])[0]
confidence = probs[top_idx] * 100
st.success(f"### 🚨 Predicted Category: **{category}**")
st.info(f"**Confidence:** {confidence:.2f}%")
# Top 3 chart
top3 = probs.argsort()[-3:][::-1]
chart_data = pd.DataFrame({
"Category": le_target.inverse_transform(top3),
"Probability": probs[top3]
}).set_index("Category")
st.subheader("📊 Top 3 Probabilities")
st.bar_chart(chart_data)
st.subheader("📍 Location Preview")
st.map(pd.DataFrame({"lat": [lat], "lon": [lng]}))
# AI Explanation using Groq
if description:
with st.spinner("🧠 Generating AI explanation..."):
explanation = explain_prediction_with_llama(
f"In 2-3 sentences, explain why a crime prediction model might classify an incident as '{category}' based on this description: '{description}'. Be concise and factual."
)
st.subheader("🧠 AI Explanation")
st.write(explanation)
except Exception as e:
st.error(f"❌ Prediction Error: {e}")
st.markdown("
", unsafe_allow_html=True)
# ------------------- INTERACTIVE CHATBOT -------------------
st.markdown("---")
st.markdown("", unsafe_allow_html=True)
st.subheader("💬 AI Crime Safety Assistant")
st.markdown("Ask me anything about crime prediction, safety tips, or how this system works!", unsafe_allow_html=True)
# Initialize chat history in session state
if 'messages' not in st.session_state:
st.session_state.messages = [
{"role": "assistant", "content": "👋 Hello! I'm your AI Crime Safety Assistant. I can help you understand crime patterns, provide safety recommendations, and explain how our prediction model works. What would you like to know?"}
]
# Display chat history
st.markdown("
", unsafe_allow_html=True)
for message in st.session_state.messages:
if message["role"] == "user":
st.markdown(f"
🧑 {message['content']}
", unsafe_allow_html=True)
else:
st.markdown(f"
🤖 {message['content']}
", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# Chat input
col1, col2 = st.columns([5, 1])
with col1:
user_input = st.text_input("Type your message...", key="chat_input", label_visibility="collapsed", placeholder="Ask about crime safety, predictions, or get recommendations...")
with col2:
send_button = st.button("Send 📤", use_container_width=True)
# Handle chat submission
if send_button and user_input:
# Add user message to history
st.session_state.messages.append({"role": "user", "content": user_input})
# Get AI response using Groq
with st.spinner("🧠 Thinking..."):
try:
client = get_groq_client()
# Create system prompt for crime prediction context
system_prompt = """You are an AI Crime Safety Assistant for a crime prediction system.
You help users understand:
- Crime patterns and trends in San Francisco
- How the XGBoost machine learning model predicts crime categories
- Safety tips and recommendations based on location and time
- What factors influence crime predictions (time, location, historical data)
Be helpful, concise, and informative. Keep responses to 2-3 sentences unless more detail is needed.
If asked about the model, explain it uses features like latitude, longitude, time, district, and description to predict crime types."""
# Prepare messages for Groq API
api_messages = [{"role": "system", "content": system_prompt}]
# Add recent chat history (last 5 messages for context)
for msg in st.session_state.messages[-5:]:
api_messages.append({"role": msg["role"], "content": msg["content"]})
# Get response from Groq
chat_completion = client.chat.completions.create(
messages=api_messages,
model="llama-3.3-70b-versatile",
temperature=0.7,
max_tokens=500
)
ai_response = chat_completion.choices[0].message.content
# Add AI response to history
st.session_state.messages.append({"role": "assistant", "content": ai_response})
except Exception as e:
error_msg = f"⚠️ Sorry, I encountered an error: {str(e)}"
st.session_state.messages.append({"role": "assistant", "content": error_msg})
# Rerun to update chat display
st.rerun()
st.markdown("
", unsafe_allow_html=True)