import streamlit as st import pandas as pd import numpy as np import os import xgboost as xgb import pickle import datetime from scipy.sparse import hstack, csr_matrix from groq import Groq # ------------------- PAGE CONFIG ------------------- st.set_page_config( page_title="AI Crime Predictor", page_icon="🚓", layout="wide", ) # ------------------- CUSTOM CSS ------------------- st.markdown(""" """, unsafe_allow_html=True) # ------------------- TITLE ------------------- st.markdown('

🚓 AI Crime Prediction System

', unsafe_allow_html=True) st.markdown('

Predict crime category using time, location, and incident description.

', unsafe_allow_html=True) # ------------------- LOAD MODEL ------------------- @st.cache_resource def load_artifacts(): try: # path relative to streamlit_app.py pkl_path = "src/crime_xgb_artifacts.pkl" with open(pkl_path, 'rb') as f: return pickle.load(f) except Exception as e: st.error(f"❌ Artifact loading error: {e}") return None artifacts = load_artifacts() if not artifacts: st.warning("Artifacts missing! Add `crime_xgb_artifacts.pkl` in directory.") st.stop() model = artifacts['model'] le_target = artifacts['le_target'] addr_hasher = artifacts['addr_hasher'] desc_hasher = artifacts['desc_hasher'] dense_cols = artifacts['dense_cols'] # ------------------- GROQ SETUP ------------------- @st.cache_resource def get_groq_client(): return Groq(api_key="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") def explain_prediction_with_llama(prompt): """Use Groq's Llama model to explain crime prediction""" try: client = get_groq_client() chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": prompt, } ], model="llama-3.3-70b-versatile", ) return chat_completion.choices[0].message.content except Exception as e: return f"⚠️ Could not generate explanation: {e}" # ------------------- SIDEBAR ------------------- st.sidebar.title("📝 Input Features") date = st.sidebar.date_input("📅 Date", datetime.date.today()) time = st.sidebar.time_input("⏰ Time", datetime.datetime.now().time()) default_lat = 37.7749 default_lng = -122.4194 lat = st.sidebar.number_input("📍 Latitude", value=default_lat, format="%.6f") lng = st.sidebar.number_input("📍 Longitude", value=default_lng, format="%.6f") districts = sorted(['BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION', 'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN']) district = st.sidebar.selectbox("🏢 Police District", districts) address = st.sidebar.text_input("📌 Address", "") description = st.sidebar.text_area("📝 Description", "") # ------------------- MAIN PREDICTION CARD ------------------- with st.container(): st.markdown("
", unsafe_allow_html=True) st.subheader("🔍 Prediction Panel") if st.button("🚓 Predict Crime Category"): try: dt_obj = pd.to_datetime(f"{date} {time}") hour = dt_obj.hour dense_data = { 'X': float(lng), 'Y': float(lat), 'Year': dt_obj.year, 'Month': dt_obj.month, 'Day': dt_obj.day, 'Minute': dt_obj.minute, 'Hour': hour, 'Hour_sin': np.sin(2 * np.pi * hour / 24), 'Hour_cos': np.cos(2 * np.pi * hour / 24), 'PdDistrict_enc': districts.index(district), 'DayOfWeek_enc': dt_obj.dayofweek } dense_df = pd.DataFrame([dense_data])[dense_cols] dense_sparse = csr_matrix(dense_df.values) addr_hashed = addr_hasher.transform([address.split()]) desc_hashed = desc_hasher.transform([description.split()]) features = hstack([dense_sparse, addr_hashed, desc_hashed]) probs = model.predict_proba(features)[0] top_idx = np.argmax(probs) category = le_target.inverse_transform([top_idx])[0] confidence = probs[top_idx] * 100 st.success(f"### 🚨 Predicted Category: **{category}**") st.info(f"**Confidence:** {confidence:.2f}%") # Top 3 chart top3 = probs.argsort()[-3:][::-1] chart_data = pd.DataFrame({ "Category": le_target.inverse_transform(top3), "Probability": probs[top3] }).set_index("Category") st.subheader("📊 Top 3 Probabilities") st.bar_chart(chart_data) st.subheader("📍 Location Preview") st.map(pd.DataFrame({"lat": [lat], "lon": [lng]})) # AI Explanation using Groq if description: with st.spinner("🧠 Generating AI explanation..."): explanation = explain_prediction_with_llama( f"In 2-3 sentences, explain why a crime prediction model might classify an incident as '{category}' based on this description: '{description}'. Be concise and factual." ) st.subheader("🧠 AI Explanation") st.write(explanation) except Exception as e: st.error(f"❌ Prediction Error: {e}") st.markdown("
", unsafe_allow_html=True) # ------------------- INTERACTIVE CHATBOT ------------------- st.markdown("---") st.markdown("
", unsafe_allow_html=True) st.subheader("💬 AI Crime Safety Assistant") st.markdown("Ask me anything about crime prediction, safety tips, or how this system works!", unsafe_allow_html=True) # Initialize chat history in session state if 'messages' not in st.session_state: st.session_state.messages = [ {"role": "assistant", "content": "👋 Hello! I'm your AI Crime Safety Assistant. I can help you understand crime patterns, provide safety recommendations, and explain how our prediction model works. What would you like to know?"} ] # Display chat history st.markdown("
", unsafe_allow_html=True) for message in st.session_state.messages: if message["role"] == "user": st.markdown(f"
🧑 {message['content']}
", unsafe_allow_html=True) else: st.markdown(f"
🤖 {message['content']}
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Chat input col1, col2 = st.columns([5, 1]) with col1: user_input = st.text_input("Type your message...", key="chat_input", label_visibility="collapsed", placeholder="Ask about crime safety, predictions, or get recommendations...") with col2: send_button = st.button("Send 📤", use_container_width=True) # Handle chat submission if send_button and user_input: # Add user message to history st.session_state.messages.append({"role": "user", "content": user_input}) # Get AI response using Groq with st.spinner("🧠 Thinking..."): try: client = get_groq_client() # Create system prompt for crime prediction context system_prompt = """You are an AI Crime Safety Assistant for a crime prediction system. You help users understand: - Crime patterns and trends in San Francisco - How the XGBoost machine learning model predicts crime categories - Safety tips and recommendations based on location and time - What factors influence crime predictions (time, location, historical data) Be helpful, concise, and informative. Keep responses to 2-3 sentences unless more detail is needed. If asked about the model, explain it uses features like latitude, longitude, time, district, and description to predict crime types.""" # Prepare messages for Groq API api_messages = [{"role": "system", "content": system_prompt}] # Add recent chat history (last 5 messages for context) for msg in st.session_state.messages[-5:]: api_messages.append({"role": msg["role"], "content": msg["content"]}) # Get response from Groq chat_completion = client.chat.completions.create( messages=api_messages, model="llama-3.3-70b-versatile", temperature=0.7, max_tokens=500 ) ai_response = chat_completion.choices[0].message.content # Add AI response to history st.session_state.messages.append({"role": "assistant", "content": ai_response}) except Exception as e: error_msg = f"⚠️ Sorry, I encountered an error: {str(e)}" st.session_state.messages.append({"role": "assistant", "content": error_msg}) # Rerun to update chat display st.rerun() st.markdown("
", unsafe_allow_html=True)