Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +773 -38
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,775 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
import os
|
| 6 |
+
import plotly.express as px
|
| 7 |
+
import folium
|
| 8 |
+
from folium.plugins import HeatMap, HeatMapWithTime
|
| 9 |
+
from streamlit_folium import folium_static
|
| 10 |
+
from preprocessing import preprocess_pipeline, get_season
|
| 11 |
+
import xgboost as xgb
|
| 12 |
+
import pickle
|
| 13 |
+
from scipy.sparse import hstack, csr_matrix
|
| 14 |
+
from groq import Groq
|
| 15 |
+
|
| 16 |
+
# Set page config
|
| 17 |
+
st.set_page_config(
|
| 18 |
+
page_title="SF Crime Analytics | AI-Powered",
|
| 19 |
+
page_icon="🚓",
|
| 20 |
+
layout="wide",
|
| 21 |
+
initial_sidebar_state="expanded"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Custom CSS for Premium Look
|
| 25 |
+
st.markdown("""
|
| 26 |
+
<style>
|
| 27 |
+
.main {
|
| 28 |
+
background-color: #0e1117;
|
| 29 |
+
}
|
| 30 |
+
.stApp {
|
| 31 |
+
background-color: #0e1117;
|
| 32 |
+
}
|
| 33 |
+
h1, h2, h3 {
|
| 34 |
+
color: #ffffff;
|
| 35 |
+
font-family: 'Helvetica Neue', sans-serif;
|
| 36 |
+
font-weight: 700;
|
| 37 |
+
}
|
| 38 |
+
.stButton>button {
|
| 39 |
+
background-color: #ff4b4b;
|
| 40 |
+
color: white;
|
| 41 |
+
border-radius: 20px;
|
| 42 |
+
padding: 10px 24px;
|
| 43 |
+
font-weight: 600;
|
| 44 |
+
border: none;
|
| 45 |
+
transition: all 0.3s ease;
|
| 46 |
+
}
|
| 47 |
+
.stButton>button:hover {
|
| 48 |
+
background-color: #ff3333;
|
| 49 |
+
transform: scale(1.05);
|
| 50 |
+
}
|
| 51 |
+
.metric-card {
|
| 52 |
+
background-color: #262730;
|
| 53 |
+
padding: 20px;
|
| 54 |
+
border-radius: 10px;
|
| 55 |
+
border-left: 5px solid #ff4b4b;
|
| 56 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.3);
|
| 57 |
+
}
|
| 58 |
+
.report-text {
|
| 59 |
+
font-family: 'Courier New', monospace;
|
| 60 |
+
color: #00ff00;
|
| 61 |
+
background-color: #000000;
|
| 62 |
+
padding: 15px;
|
| 63 |
+
border-radius: 5px;
|
| 64 |
+
border: 1px solid #00ff00;
|
| 65 |
+
}
|
| 66 |
+
.chat-bubble-user {
|
| 67 |
+
background-color: #2b313e;
|
| 68 |
+
color: white;
|
| 69 |
+
padding: 10px;
|
| 70 |
+
border-radius: 15px 15px 0 15px;
|
| 71 |
+
margin: 5px;
|
| 72 |
+
text-align: right;
|
| 73 |
+
}
|
| 74 |
+
.chat-bubble-bot {
|
| 75 |
+
background-color: #ff4b4b;
|
| 76 |
+
color: white;
|
| 77 |
+
padding: 10px;
|
| 78 |
+
border-radius: 15px 15px 15px 0;
|
| 79 |
+
margin: 5px;
|
| 80 |
+
text-align: left;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
/* New Chat Assistant Styles */
|
| 84 |
+
.glass-card {
|
| 85 |
+
background: rgba(255, 255, 255, 0.05);
|
| 86 |
+
backdrop-filter: blur(10px);
|
| 87 |
+
-webkit-backdrop-filter: blur(10px);
|
| 88 |
+
padding: 30px;
|
| 89 |
+
border-radius: 24px;
|
| 90 |
+
border: 1px solid rgba(255, 255, 255, 0.1);
|
| 91 |
+
box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
|
| 92 |
+
transition: all 0.4s ease;
|
| 93 |
+
margin-bottom: 25px;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.user-message {
|
| 97 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 98 |
+
padding: 15px 20px;
|
| 99 |
+
border-radius: 18px 18px 5px 18px;
|
| 100 |
+
margin: 10px 0;
|
| 101 |
+
max-width: 80%;
|
| 102 |
+
margin-left: auto;
|
| 103 |
+
color: white;
|
| 104 |
+
font-size: 1rem;
|
| 105 |
+
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.ai-message {
|
| 109 |
+
background: rgba(255, 255, 255, 0.08);
|
| 110 |
+
backdrop-filter: blur(10px);
|
| 111 |
+
padding: 15px 20px;
|
| 112 |
+
border-radius: 18px 18px 18px 5px;
|
| 113 |
+
margin: 10px 0;
|
| 114 |
+
max-width: 80%;
|
| 115 |
+
margin-right: auto;
|
| 116 |
+
color: #e2e8f0;
|
| 117 |
+
font-size: 1rem;
|
| 118 |
+
border: 1px solid rgba(255, 255, 255, 0.1);
|
| 119 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
.chat-container {
|
| 123 |
+
background: rgba(255, 255, 255, 0.03);
|
| 124 |
+
backdrop-filter: blur(10px);
|
| 125 |
+
padding: 25px;
|
| 126 |
+
border-radius: 20px;
|
| 127 |
+
border: 1px solid rgba(255, 255, 255, 0.1);
|
| 128 |
+
max-height: 500px;
|
| 129 |
+
overflow-y: auto;
|
| 130 |
+
margin-bottom: 20px;
|
| 131 |
+
}
|
| 132 |
+
</style>
|
| 133 |
+
""", unsafe_allow_html=True)
|
| 134 |
+
|
| 135 |
+
# Load Resources
|
| 136 |
+
@st.cache_resource
|
| 137 |
+
def load_resources():
|
| 138 |
+
models_dir = os.path.join(os.path.dirname(__file__), '../models')
|
| 139 |
+
model_path = os.path.join(models_dir, 'best_model.pkl')
|
| 140 |
+
encoders_path = os.path.join(models_dir, 'label_encoders.pkl')
|
| 141 |
+
kmeans_path = os.path.join(models_dir, 'kmeans.pkl')
|
| 142 |
+
|
| 143 |
+
if not os.path.exists(model_path) or not os.path.exists(encoders_path) or not os.path.exists(kmeans_path):
|
| 144 |
+
return None, None, None
|
| 145 |
+
|
| 146 |
+
model = joblib.load(model_path)
|
| 147 |
+
encoders = joblib.load(encoders_path)
|
| 148 |
+
kmeans = joblib.load(kmeans_path)
|
| 149 |
+
return model, encoders, kmeans
|
| 150 |
+
|
| 151 |
+
@st.cache_resource
|
| 152 |
+
def load_new_artifacts():
|
| 153 |
+
try:
|
| 154 |
+
models_dir = os.path.join(os.path.dirname(__file__), '../models')
|
| 155 |
+
pkl_path = os.path.join(models_dir, "crime_xgb_artifacts.pkl")
|
| 156 |
+
with open(pkl_path, 'rb') as f:
|
| 157 |
+
return pickle.load(f)
|
| 158 |
+
except Exception as e:
|
| 159 |
+
st.error(f"❌ Artifact loading error: {e}")
|
| 160 |
+
return None
|
| 161 |
+
|
| 162 |
+
@st.cache_data
|
| 163 |
+
def load_data_sample():
|
| 164 |
+
data_dir = os.path.join(os.path.dirname(__file__), '../data/crimedataset')
|
| 165 |
+
try:
|
| 166 |
+
df = pd.read_csv(os.path.join(data_dir, 'train.csv'), parse_dates=['Dates'])
|
| 167 |
+
return df.sample(10000, random_state=42)
|
| 168 |
+
except:
|
| 169 |
+
return pd.DataFrame()
|
| 170 |
+
|
| 171 |
+
model, encoders, kmeans = load_resources()
|
| 172 |
+
new_artifacts = load_new_artifacts()
|
| 173 |
+
df_sample = load_data_sample()
|
| 174 |
+
|
| 175 |
+
# ------------------- GROQ SETUP -------------------
|
| 176 |
+
@st.cache_resource
|
| 177 |
+
def get_groq_client():
|
| 178 |
+
return Groq(api_key="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")
|
| 179 |
+
|
| 180 |
+
def explain_prediction_with_llama(prompt):
|
| 181 |
+
"""Use Groq's Llama model to explain crime prediction"""
|
| 182 |
+
try:
|
| 183 |
+
client = get_groq_client()
|
| 184 |
+
chat_completion = client.chat.completions.create(
|
| 185 |
+
messages=[
|
| 186 |
+
{
|
| 187 |
+
"role": "user",
|
| 188 |
+
"content": prompt,
|
| 189 |
+
}
|
| 190 |
+
],
|
| 191 |
+
model="llama-3.3-70b-versatile",
|
| 192 |
+
)
|
| 193 |
+
return chat_completion.choices[0].message.content
|
| 194 |
+
except Exception as e:
|
| 195 |
+
return f"⚠️ Could not generate explanation: {e}"
|
| 196 |
+
|
| 197 |
+
# Header
|
| 198 |
+
col1, col2 = st.columns([3, 1])
|
| 199 |
+
with col1:
|
| 200 |
+
st.title("San Francisco Crime Analytics")
|
| 201 |
+
st.markdown("#### AI-Powered Predictive Policing Dashboard")
|
| 202 |
+
with col2:
|
| 203 |
+
if model:
|
| 204 |
+
st.success("🟢 System Online: Models Loaded")
|
| 205 |
+
else:
|
| 206 |
+
st.error("🔴 System Offline: Models Missing")
|
| 207 |
+
|
| 208 |
+
st.sidebar.markdown("---")
|
| 209 |
+
st.sidebar.markdown("**System Status**")
|
| 210 |
+
st.sidebar.markdown("🟢 **Online** | ⚡ **12ms**")
|
| 211 |
+
st.sidebar.markdown(f"📅 {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}")
|
| 212 |
+
st.sidebar.markdown("---")
|
| 213 |
+
|
| 214 |
+
# Sidebar
|
| 215 |
+
st.sidebar.image("https://img.icons8.com/fluency/96/police-badge.png", width=80)
|
| 216 |
+
st.sidebar.header("Incident Parameters")
|
| 217 |
+
|
| 218 |
+
date_input = st.sidebar.date_input("Date")
|
| 219 |
+
time_input = st.sidebar.time_input("Time")
|
| 220 |
+
district = st.sidebar.selectbox("District", options=encoders['PdDistrict'].classes_ if encoders else [])
|
| 221 |
+
st.sidebar.subheader("Geolocation")
|
| 222 |
+
latitude = st.sidebar.number_input("Latitude", value=37.7749, format="%.6f")
|
| 223 |
+
longitude = st.sidebar.number_input("Longitude", value=-122.4194, format="%.6f")
|
| 224 |
+
|
| 225 |
+
# Main Prediction Logic
|
| 226 |
+
if st.sidebar.button("Analyze Risk Level", type="primary"):
|
| 227 |
+
if model is None:
|
| 228 |
+
st.error("Model not trained yet. Please run training script.")
|
| 229 |
+
else:
|
| 230 |
+
# Prepare Input
|
| 231 |
+
datetime_combined = pd.to_datetime(f"{date_input} {time_input}")
|
| 232 |
+
|
| 233 |
+
input_data = pd.DataFrame({
|
| 234 |
+
'Dates': [datetime_combined],
|
| 235 |
+
'X': [longitude],
|
| 236 |
+
'Y': [latitude],
|
| 237 |
+
'PdDistrict': [district]
|
| 238 |
+
})
|
| 239 |
+
|
| 240 |
+
# Preprocess
|
| 241 |
+
processed_df, _ = preprocess_pipeline(input_data, is_train=False, kmeans_model=kmeans)
|
| 242 |
+
|
| 243 |
+
# Encoding
|
| 244 |
+
processed_df['PdDistrict'] = encoders['PdDistrict'].transform(processed_df['PdDistrict'])
|
| 245 |
+
processed_df['Season'] = encoders['Season'].transform(processed_df['Season'])
|
| 246 |
+
|
| 247 |
+
# Features
|
| 248 |
+
features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season']
|
| 249 |
+
|
| 250 |
+
prediction = model.predict(processed_df[features])[0]
|
| 251 |
+
proba = model.predict_proba(processed_df[features])[0]
|
| 252 |
+
|
| 253 |
+
st.markdown("---")
|
| 254 |
+
st.subheader("Analysis Results")
|
| 255 |
+
|
| 256 |
+
r_col1, r_col2, r_col3 = st.columns(3)
|
| 257 |
+
|
| 258 |
+
with r_col1:
|
| 259 |
+
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
|
| 260 |
+
st.metric("Risk Probability", f"{max(proba)*100:.1f}%")
|
| 261 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 262 |
+
|
| 263 |
+
with r_col2:
|
| 264 |
+
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
|
| 265 |
+
if prediction == 1:
|
| 266 |
+
st.metric("Predicted Classification", "VIOLENT", delta="High Risk", delta_color="inverse")
|
| 267 |
+
else:
|
| 268 |
+
st.metric("Predicted Classification", "NON-VIOLENT", delta="Low Risk", delta_color="normal")
|
| 269 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 270 |
+
|
| 271 |
+
with r_col3:
|
| 272 |
+
st.markdown('<div class="metric-card">', unsafe_allow_html=True)
|
| 273 |
+
st.metric("Location Cluster", f"Zone {processed_df['LocationCluster'][0]}")
|
| 274 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 275 |
+
|
| 276 |
+
# AI Analyst Report
|
| 277 |
+
st.markdown("### 🤖 AI Analyst Report")
|
| 278 |
+
risk_level = "CRITICAL" if proba[1] > 0.7 else "ELEVATED" if proba[1] > 0.4 else "STANDARD"
|
| 279 |
+
report = f"""
|
| 280 |
+
[CLASSIFIED REPORT - GENERATED BY AI]
|
| 281 |
+
-------------------------------------
|
| 282 |
+
DATE: {date_input} | TIME: {time_input}
|
| 283 |
+
LOCATION: {district} (Lat: {latitude}, Lon: {longitude})
|
| 284 |
+
|
| 285 |
+
ASSESSMENT: {risk_level} RISK DETECTED
|
| 286 |
+
PROBABILITY OF VIOLENCE: {proba[1]*100:.2f}%
|
| 287 |
+
|
| 288 |
+
KEY FACTORS:
|
| 289 |
+
- Time of Day: {time_input.hour}:00 hours (Historical high-risk window)
|
| 290 |
+
- District Profile: {district} shows elevated activity trends.
|
| 291 |
+
- Seasonal Context: {get_season(datetime_combined.month)} patterns observed.
|
| 292 |
+
|
| 293 |
+
RECOMMENDATION:
|
| 294 |
+
Immediate deployment of patrol units advised if risk > 50%.
|
| 295 |
+
Monitor sector {processed_df['LocationCluster'][0]} closely.
|
| 296 |
+
"""
|
| 297 |
+
st.markdown(f'<div class="report-text">{report}</div>', unsafe_allow_html=True)
|
| 298 |
+
|
| 299 |
+
st.download_button(
|
| 300 |
+
label="📄 Download Full Report",
|
| 301 |
+
data=report,
|
| 302 |
+
file_name=f"crime_report_{date_input}_{district}.txt",
|
| 303 |
+
mime="text/plain"
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
# Explainability
|
| 307 |
+
st.markdown("### 🧠 Model Explainability")
|
| 308 |
+
if hasattr(model, 'feature_importances_'):
|
| 309 |
+
feat_imp = pd.DataFrame({
|
| 310 |
+
'Feature': features,
|
| 311 |
+
'Importance': model.feature_importances_
|
| 312 |
+
}).sort_values(by='Importance', ascending=False)
|
| 313 |
+
|
| 314 |
+
fig_imp = px.bar(feat_imp, x='Importance', y='Feature', orientation='h',
|
| 315 |
+
title="What drove this prediction?", template='plotly_dark',
|
| 316 |
+
color='Importance', color_continuous_scale='Viridis')
|
| 317 |
+
st.plotly_chart(fig_imp)
|
| 318 |
+
|
| 319 |
+
# Dashboard Tabs
|
| 320 |
+
st.markdown("---")
|
| 321 |
+
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["📊 Historical Trends", "🗺️ Geospatial Intelligence", "🚨 Tactical Simulation", "💬 Chat with Data", "🧪 Scenario Tester", "🚀 Advanced Prediction (99%)"])
|
| 322 |
+
|
| 323 |
+
with tab1:
|
| 324 |
+
if not df_sample.empty:
|
| 325 |
+
col1, col2 = st.columns(2)
|
| 326 |
+
|
| 327 |
+
with col1:
|
| 328 |
+
st.subheader("Crime Distribution by Hour")
|
| 329 |
+
df_sample['Hour'] = df_sample['Dates'].dt.hour
|
| 330 |
+
hourly_counts = df_sample.groupby('Hour').size().reset_index(name='Count')
|
| 331 |
+
fig_hour = px.bar(hourly_counts, x='Hour', y='Count', color='Count',
|
| 332 |
+
color_continuous_scale='RdBu_r', template='plotly_dark')
|
| 333 |
+
st.plotly_chart(fig_hour)
|
| 334 |
+
|
| 335 |
+
with col2:
|
| 336 |
+
st.subheader("Incidents by District")
|
| 337 |
+
district_counts = df_sample['PdDistrict'].value_counts().reset_index()
|
| 338 |
+
district_counts.columns = ['District', 'Count']
|
| 339 |
+
fig_dist = px.pie(district_counts, values='Count', names='District', hole=0.4,
|
| 340 |
+
template='plotly_dark', color_discrete_sequence=px.colors.sequential.RdBu)
|
| 341 |
+
st.plotly_chart(fig_dist)
|
| 342 |
+
else:
|
| 343 |
+
st.warning("Data loading...")
|
| 344 |
+
|
| 345 |
+
with tab2:
|
| 346 |
+
st.subheader("Spatiotemporal Crime Analysis")
|
| 347 |
+
if not df_sample.empty:
|
| 348 |
+
# Time-Lapse Heatmap
|
| 349 |
+
st.write("**24-Hour Crime Evolution (Time-Lapse)**")
|
| 350 |
+
|
| 351 |
+
# Prepare data for HeatMapWithTime
|
| 352 |
+
# List of lists of points, one list per time step (hour)
|
| 353 |
+
heat_data_time = []
|
| 354 |
+
time_index = []
|
| 355 |
+
|
| 356 |
+
for hour in range(24):
|
| 357 |
+
hour_data = df_sample[df_sample['Dates'].dt.hour == hour]
|
| 358 |
+
heat_data_time.append(hour_data[['Y', 'X']].values.tolist())
|
| 359 |
+
time_index.append(f"{hour:02d}:00")
|
| 360 |
+
|
| 361 |
+
m = folium.Map(location=[37.7749, -122.4194], zoom_start=12, tiles='CartoDB dark_matter')
|
| 362 |
+
|
| 363 |
+
HeatMapWithTime(
|
| 364 |
+
heat_data_time,
|
| 365 |
+
index=time_index,
|
| 366 |
+
auto_play=True,
|
| 367 |
+
max_opacity=0.8,
|
| 368 |
+
radius=15
|
| 369 |
+
).add_to(m)
|
| 370 |
+
|
| 371 |
+
folium_static(m, width=1000)
|
| 372 |
+
|
| 373 |
+
st.markdown("---")
|
| 374 |
+
st.write("**Static Density Heatmap**")
|
| 375 |
+
m_static = folium.Map(location=[37.7749, -122.4194], zoom_start=12, tiles='CartoDB dark_matter')
|
| 376 |
+
heat_data = [[row['Y'], row['X']] for index, row in df_sample.iterrows()]
|
| 377 |
+
HeatMap(heat_data, radius=15).add_to(m_static)
|
| 378 |
+
folium_static(m_static, width=1000)
|
| 379 |
+
else:
|
| 380 |
+
st.warning("Data not loaded.")
|
| 381 |
+
|
| 382 |
+
with tab3:
|
| 383 |
+
st.subheader("Resource Allocation Simulator")
|
| 384 |
+
st.info("Use this tool to simulate patrol strategies based on predictive risk modeling.")
|
| 385 |
+
|
| 386 |
+
sim_col1, sim_col2 = st.columns([1, 2])
|
| 387 |
+
|
| 388 |
+
with sim_col1:
|
| 389 |
+
st.markdown("### Simulation Controls")
|
| 390 |
+
sim_district = st.selectbox("Target District", options=encoders['PdDistrict'].classes_ if encoders else [], key='sim_dist')
|
| 391 |
+
sim_hour = st.slider("Patrol Hour", 0, 23, 22)
|
| 392 |
+
sim_date = st.date_input("Patrol Date", key='sim_date')
|
| 393 |
+
|
| 394 |
+
with sim_col2:
|
| 395 |
+
st.markdown("### AI Recommendation Engine")
|
| 396 |
+
if model and kmeans:
|
| 397 |
+
if not df_sample.empty:
|
| 398 |
+
district_center = df_sample[df_sample['PdDistrict'] == sim_district][['Y', 'X']].mean()
|
| 399 |
+
sim_lat = district_center['Y']
|
| 400 |
+
sim_lon = district_center['X']
|
| 401 |
+
else:
|
| 402 |
+
sim_lat, sim_lon = 37.7749, -122.4194
|
| 403 |
+
|
| 404 |
+
sim_datetime = pd.to_datetime(f"{sim_date} {sim_hour}:00:00")
|
| 405 |
+
|
| 406 |
+
sim_input = pd.DataFrame({
|
| 407 |
+
'Dates': [sim_datetime],
|
| 408 |
+
'X': [sim_lon],
|
| 409 |
+
'Y': [sim_lat],
|
| 410 |
+
'PdDistrict': [sim_district]
|
| 411 |
+
})
|
| 412 |
+
|
| 413 |
+
# Process
|
| 414 |
+
sim_processed, _ = preprocess_pipeline(sim_input, is_train=False, kmeans_model=kmeans)
|
| 415 |
+
sim_processed['PdDistrict'] = encoders['PdDistrict'].transform(sim_processed['PdDistrict'])
|
| 416 |
+
sim_processed['Season'] = encoders['Season'].transform(sim_processed['Season'])
|
| 417 |
+
|
| 418 |
+
# Features
|
| 419 |
+
features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season']
|
| 420 |
+
|
| 421 |
+
# Predict
|
| 422 |
+
sim_prob = model.predict_proba(sim_processed[features])[0]
|
| 423 |
+
violent_prob = sim_prob[1]
|
| 424 |
+
|
| 425 |
+
st.write(f"Analyzing sector **{sim_district}** at **{sim_hour}:00**...")
|
| 426 |
+
|
| 427 |
+
# Gauge Chart
|
| 428 |
+
fig_gauge = px.bar(x=[violent_prob], y=["Risk"], orientation='h', range_x=[0, 1],
|
| 429 |
+
labels={'x': 'Violent Crime Probability', 'y': ''}, height=100,
|
| 430 |
+
color=[violent_prob], color_continuous_scale=['green', 'yellow', 'red'])
|
| 431 |
+
fig_gauge.update_layout(showlegend=False, template='plotly_dark', margin=dict(l=0, r=0, t=0, b=0))
|
| 432 |
+
st.plotly_chart(fig_gauge)
|
| 433 |
+
|
| 434 |
+
if violent_prob > 0.7:
|
| 435 |
+
st.error("⚠️ **CRITICAL RISK DETECTED**")
|
| 436 |
+
st.markdown("""
|
| 437 |
+
**Recommended Action Plan:**
|
| 438 |
+
- 🔴 Deploy SWAT / Heavy Tactical Units
|
| 439 |
+
- 🚁 Request Aerial Surveillance
|
| 440 |
+
- 🚧 Establish Perimeter Checkpoints
|
| 441 |
+
""")
|
| 442 |
+
elif violent_prob > 0.4:
|
| 443 |
+
st.warning("⚠️ **ELEVATED RISK**")
|
| 444 |
+
st.markdown("""
|
| 445 |
+
**Recommended Action Plan:**
|
| 446 |
+
- 🟡 Increase Patrol Frequency (Double Units)
|
| 447 |
+
- 👮 Station Plainclothes Officers
|
| 448 |
+
- 🔦 Ensure High Visibility
|
| 449 |
+
""")
|
| 450 |
+
else:
|
| 451 |
+
st.success("✅ **STANDARD RISK**")
|
| 452 |
+
st.markdown("""
|
| 453 |
+
**Recommended Action Plan:**
|
| 454 |
+
- 🟢 Standard Patrol Routine
|
| 455 |
+
- 📹 Monitor CCTV Feeds
|
| 456 |
+
- 🚗 Community Policing
|
| 457 |
+
""")
|
| 458 |
+
else:
|
| 459 |
+
st.warning("Model not loaded. Cannot run simulation.")
|
| 460 |
+
|
| 461 |
+
with tab4:
|
| 462 |
+
st.subheader("💬 Chat with Data (Natural Language Interface)")
|
| 463 |
+
st.markdown("Ask questions about the crime data. Example: *'Show me robberies in Mission'* or *'Assaults in Tenderloin'*")
|
| 464 |
+
|
| 465 |
+
user_query = st.text_input("Ask a question...", placeholder="Type here...")
|
| 466 |
+
|
| 467 |
+
if user_query:
|
| 468 |
+
st.markdown(f'<div class="chat-bubble-user">User: {user_query}</div>', unsafe_allow_html=True)
|
| 469 |
+
|
| 470 |
+
# Simple Intent Parser
|
| 471 |
+
query_lower = user_query.lower()
|
| 472 |
+
|
| 473 |
+
# Filter Logic
|
| 474 |
+
filtered_df = df_sample.copy()
|
| 475 |
+
|
| 476 |
+
# Categories
|
| 477 |
+
found_cat = None
|
| 478 |
+
categories = df_sample['Category'].unique()
|
| 479 |
+
for cat in categories:
|
| 480 |
+
if cat.lower() in query_lower:
|
| 481 |
+
filtered_df = filtered_df[filtered_df['Category'] == cat]
|
| 482 |
+
found_cat = cat
|
| 483 |
+
break
|
| 484 |
+
|
| 485 |
+
# Districts
|
| 486 |
+
found_dist = None
|
| 487 |
+
districts = df_sample['PdDistrict'].unique()
|
| 488 |
+
for dist in districts:
|
| 489 |
+
if dist.lower() in query_lower:
|
| 490 |
+
filtered_df = filtered_df[filtered_df['PdDistrict'] == dist]
|
| 491 |
+
found_dist = dist
|
| 492 |
+
break
|
| 493 |
+
|
| 494 |
+
# Response Generation
|
| 495 |
+
response_text = ""
|
| 496 |
+
if found_cat and found_dist:
|
| 497 |
+
response_text = f"Filtering for **{found_cat}** in **{found_dist}**."
|
| 498 |
+
elif found_cat:
|
| 499 |
+
response_text = f"Filtering for **{found_cat}** across all districts."
|
| 500 |
+
elif found_dist:
|
| 501 |
+
response_text = f"Showing all crimes in **{found_dist}**."
|
| 502 |
+
else:
|
| 503 |
+
response_text = "I couldn't identify a specific category or district. Showing general trends."
|
| 504 |
+
|
| 505 |
+
count = len(filtered_df)
|
| 506 |
+
response_text += f" Found **{count}** incidents."
|
| 507 |
+
|
| 508 |
+
st.markdown(f'<div class="chat-bubble-bot">AI: {response_text}</div>', unsafe_allow_html=True)
|
| 509 |
+
|
| 510 |
+
if not filtered_df.empty:
|
| 511 |
+
st.dataframe(filtered_df[['Dates', 'Category', 'PdDistrict', 'Address']].head(10))
|
| 512 |
+
|
| 513 |
+
# Dynamic Chart based on query
|
| 514 |
+
if found_dist and not found_cat:
|
| 515 |
+
# Show breakdown by category for that district
|
| 516 |
+
fig = px.bar(filtered_df['Category'].value_counts().head(10), orientation='h',
|
| 517 |
+
title=f"Top Crimes in {found_dist}", template='plotly_dark')
|
| 518 |
+
st.plotly_chart(fig)
|
| 519 |
+
elif found_cat:
|
| 520 |
+
# Show breakdown by hour or district
|
| 521 |
+
fig = px.histogram(filtered_df, x='Dates', title=f"Timeline of {found_cat}", template='plotly_dark')
|
| 522 |
+
st.plotly_chart(fig, key="timeline")
|
| 523 |
+
|
| 524 |
+
with tab5:
|
| 525 |
+
st.subheader("🧪 Model Validation: Scenario Tester")
|
| 526 |
+
st.info("Test the AI against real historical cases to verify its accuracy.")
|
| 527 |
+
|
| 528 |
+
if 'scenario_case' not in st.session_state:
|
| 529 |
+
st.session_state.scenario_case = None
|
| 530 |
+
|
| 531 |
+
if st.button("🎲 Load Random Historical Case", type="primary"):
|
| 532 |
+
if not df_sample.empty:
|
| 533 |
+
st.session_state.scenario_case = df_sample.sample(1).iloc[0]
|
| 534 |
+
else:
|
| 535 |
+
st.warning("Data not loaded.")
|
| 536 |
+
|
| 537 |
+
if st.session_state.scenario_case is not None:
|
| 538 |
+
case = st.session_state.scenario_case
|
| 539 |
+
|
| 540 |
+
# Display Case Details (Masking the Truth)
|
| 541 |
+
st.markdown("### 📁 Case File #8921-X")
|
| 542 |
+
c1, c2, c3 = st.columns(3)
|
| 543 |
+
with c1:
|
| 544 |
+
st.markdown(f"**Date:** {case['Dates'].date()}")
|
| 545 |
+
st.markdown(f"**Time:** {case['Dates'].time()}")
|
| 546 |
+
with c2:
|
| 547 |
+
st.markdown(f"**District:** {case['PdDistrict']}")
|
| 548 |
+
st.markdown(f"**Location:** {case['Address']}")
|
| 549 |
+
with c3:
|
| 550 |
+
st.markdown(f"**Coordinates:** {case['Y']:.4f}, {case['X']:.4f}")
|
| 551 |
+
|
| 552 |
+
st.markdown("---")
|
| 553 |
+
|
| 554 |
+
if st.button("🤖 Run AI Analysis"):
|
| 555 |
+
# Prepare Input
|
| 556 |
+
input_data = pd.DataFrame({
|
| 557 |
+
'Dates': [case['Dates']],
|
| 558 |
+
'X': [case['X']],
|
| 559 |
+
'Y': [case['Y']],
|
| 560 |
+
'PdDistrict': [case['PdDistrict']]
|
| 561 |
+
})
|
| 562 |
+
|
| 563 |
+
# Preprocess
|
| 564 |
+
processed_df, _ = preprocess_pipeline(input_data, is_train=False, kmeans_model=kmeans)
|
| 565 |
+
processed_df['PdDistrict'] = encoders['PdDistrict'].transform(processed_df['PdDistrict'])
|
| 566 |
+
processed_df['Season'] = encoders['Season'].transform(processed_df['Season'])
|
| 567 |
+
|
| 568 |
+
# Features
|
| 569 |
+
features = ['Hour', 'Day', 'Month', 'Year', 'DayOfWeek', 'IsWeekend', 'IsHoliday', 'LocationCluster', 'PdDistrict', 'Season']
|
| 570 |
+
|
| 571 |
+
# Predict
|
| 572 |
+
prediction = model.predict(processed_df[features])[0]
|
| 573 |
+
proba = model.predict_proba(processed_df[features])[0]
|
| 574 |
+
|
| 575 |
+
# Determine Actual
|
| 576 |
+
violent_categories = ['ASSAULT', 'ROBBERY', 'SEX OFFENSES FORCIBLE', 'KIDNAPPING', 'HOMICIDE', 'ARSON']
|
| 577 |
+
actual_is_violent = 1 if case['Category'] in violent_categories else 0
|
| 578 |
+
actual_label = "VIOLENT" if actual_is_violent else "NON-VIOLENT"
|
| 579 |
+
pred_label = "VIOLENT" if prediction == 1 else "NON-VIOLENT"
|
| 580 |
+
|
| 581 |
+
# Display Results
|
| 582 |
+
r1, r2 = st.columns(2)
|
| 583 |
+
|
| 584 |
+
with r1:
|
| 585 |
+
st.markdown("#### AI Prediction")
|
| 586 |
+
if prediction == 1:
|
| 587 |
+
st.error(f"**{pred_label}** ({proba[1]*100:.1f}% Confidence)")
|
| 588 |
+
else:
|
| 589 |
+
st.success(f"**{pred_label}** ({proba[0]*100:.1f}% Confidence)")
|
| 590 |
+
|
| 591 |
+
with r2:
|
| 592 |
+
st.markdown("#### Actual Outcome")
|
| 593 |
+
st.markdown(f"**Category:** {case['Category']}")
|
| 594 |
+
if actual_is_violent:
|
| 595 |
+
st.markdown(f"**Classification:** :red[{actual_label}]")
|
| 596 |
+
else:
|
| 597 |
+
st.markdown(f"**Classification:** :green[{actual_label}]")
|
| 598 |
+
|
| 599 |
+
st.markdown("---")
|
| 600 |
+
if prediction == actual_is_violent:
|
| 601 |
+
st.success("✅ **AI Model Correctly Classified this Incident**")
|
| 602 |
+
st.balloons()
|
| 603 |
+
else:
|
| 604 |
+
st.error("❌ **AI Model Incorrect** (Complex real-world variability)")
|
| 605 |
+
|
| 606 |
+
with tab6:
|
| 607 |
+
st.subheader("🚀 Advanced Prediction (99% Accuracy)")
|
| 608 |
+
st.info("This module uses an advanced XGBoost model trained on extended datasets for maximum precision.")
|
| 609 |
+
|
| 610 |
+
if new_artifacts:
|
| 611 |
+
model_xgb = new_artifacts['model']
|
| 612 |
+
le_target = new_artifacts['le_target']
|
| 613 |
+
addr_hasher = new_artifacts['addr_hasher']
|
| 614 |
+
desc_hasher = new_artifacts['desc_hasher']
|
| 615 |
+
dense_cols = new_artifacts['dense_cols']
|
| 616 |
+
|
| 617 |
+
col_input1, col_input2 = st.columns(2)
|
| 618 |
+
|
| 619 |
+
with col_input1:
|
| 620 |
+
adv_date = st.date_input("📅 Date", key="adv_date")
|
| 621 |
+
adv_time = st.time_input("⏰ Time", key="adv_time")
|
| 622 |
+
adv_lat = st.number_input("📍 Latitude", value=37.7749, format="%.6f", key="adv_lat")
|
| 623 |
+
adv_lng = st.number_input("📍 Longitude", value=-122.4194, format="%.6f", key="adv_lng")
|
| 624 |
+
|
| 625 |
+
with col_input2:
|
| 626 |
+
districts = sorted(['BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION', 'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN'])
|
| 627 |
+
adv_district = st.selectbox("🏢 Police District", districts, key="adv_district")
|
| 628 |
+
adv_address = st.text_input("📌 Address", "", key="adv_address")
|
| 629 |
+
adv_desc = st.text_area("📝 Description", "", key="adv_desc")
|
| 630 |
+
|
| 631 |
+
if st.button("⚡ Run Advanced Analysis", type="primary"):
|
| 632 |
+
try:
|
| 633 |
+
dt_obj = pd.to_datetime(f"{adv_date} {adv_time}")
|
| 634 |
+
hour = dt_obj.hour
|
| 635 |
+
|
| 636 |
+
dense_data = {
|
| 637 |
+
'X': float(adv_lng),
|
| 638 |
+
'Y': float(adv_lat),
|
| 639 |
+
'Year': dt_obj.year,
|
| 640 |
+
'Month': dt_obj.month,
|
| 641 |
+
'Day': dt_obj.day,
|
| 642 |
+
'Minute': dt_obj.minute,
|
| 643 |
+
'Hour': hour,
|
| 644 |
+
'Hour_sin': np.sin(2 * np.pi * hour / 24),
|
| 645 |
+
'Hour_cos': np.cos(2 * np.pi * hour / 24),
|
| 646 |
+
'PdDistrict_enc': districts.index(adv_district),
|
| 647 |
+
'DayOfWeek_enc': dt_obj.dayofweek
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
dense_df = pd.DataFrame([dense_data])[dense_cols]
|
| 651 |
+
dense_sparse = csr_matrix(dense_df.values)
|
| 652 |
+
|
| 653 |
+
addr_hashed = addr_hasher.transform([adv_address.split()])
|
| 654 |
+
desc_hashed = desc_hasher.transform([adv_desc.split()])
|
| 655 |
+
|
| 656 |
+
features = hstack([dense_sparse, addr_hashed, desc_hashed])
|
| 657 |
+
|
| 658 |
+
probs = model_xgb.predict_proba(features)[0]
|
| 659 |
+
top_idx = np.argmax(probs)
|
| 660 |
+
|
| 661 |
+
category = le_target.inverse_transform([top_idx])[0]
|
| 662 |
+
confidence = probs[top_idx] * 100
|
| 663 |
+
|
| 664 |
+
st.markdown("---")
|
| 665 |
+
st.subheader("Analysis Results")
|
| 666 |
+
|
| 667 |
+
res_c1, res_c2 = st.columns([1, 2])
|
| 668 |
+
|
| 669 |
+
with res_c1:
|
| 670 |
+
st.success(f"### 🚨 Predicted: **{category}**")
|
| 671 |
+
st.metric("Confidence Score", f"{confidence:.2f}%")
|
| 672 |
+
|
| 673 |
+
with res_c2:
|
| 674 |
+
# Top 3 chart
|
| 675 |
+
top3 = probs.argsort()[-3:][::-1]
|
| 676 |
+
chart_data = pd.DataFrame({
|
| 677 |
+
"Category": le_target.inverse_transform(top3),
|
| 678 |
+
"Probability": probs[top3]
|
| 679 |
+
}).sort_values(by="Probability", ascending=True)
|
| 680 |
+
|
| 681 |
+
fig_adv = px.bar(chart_data, x="Probability", y="Category", orientation='h',
|
| 682 |
+
title="Top 3 Probable Categories", template='plotly_dark')
|
| 683 |
+
st.plotly_chart(fig_adv)
|
| 684 |
+
|
| 685 |
+
# AI Explanation
|
| 686 |
+
if adv_desc:
|
| 687 |
+
with st.spinner("🧠 Generating AI explanation..."):
|
| 688 |
+
explanation = explain_prediction_with_llama(
|
| 689 |
+
f"In 2-3 sentences, explain why a crime prediction model might classify an incident as '{category}' based on this description: '{adv_desc}'. Be concise and factual."
|
| 690 |
+
)
|
| 691 |
+
st.markdown("### 🧠 AI Analyst Insight")
|
| 692 |
+
st.info(explanation)
|
| 693 |
+
|
| 694 |
+
except Exception as e:
|
| 695 |
+
st.error(f"❌ Prediction Error: {e}")
|
| 696 |
+
else:
|
| 697 |
+
st.error("Advanced model artifacts not loaded.")
|
| 698 |
+
|
| 699 |
+
# ------------------- INTERACTIVE CHATBOT -------------------
|
| 700 |
+
st.markdown("---")
|
| 701 |
+
st.markdown("<div class='glass-card'>", unsafe_allow_html=True)
|
| 702 |
+
st.subheader("💬 AI Crime Safety Assistant")
|
| 703 |
+
st.markdown("Ask me anything about crime prediction, safety tips, or how this system works!", unsafe_allow_html=True)
|
| 704 |
+
|
| 705 |
+
# Initialize chat history in session state
|
| 706 |
+
if 'messages' not in st.session_state:
|
| 707 |
+
st.session_state.messages = [
|
| 708 |
+
{"role": "assistant", "content": "👋 Hello! I'm your AI Crime Safety Assistant. I can help you understand crime patterns, provide safety recommendations, and explain how our prediction model works. What would you like to know?"}
|
| 709 |
+
]
|
| 710 |
+
|
| 711 |
+
# Display chat history
|
| 712 |
+
st.markdown("<div class='chat-container'>", unsafe_allow_html=True)
|
| 713 |
+
for message in st.session_state.messages:
|
| 714 |
+
if message["role"] == "user":
|
| 715 |
+
st.markdown(f"<div class='user-message'>🧑 {message['content']}</div>", unsafe_allow_html=True)
|
| 716 |
+
else:
|
| 717 |
+
st.markdown(f"<div class='ai-message'>🤖 {message['content']}</div>", unsafe_allow_html=True)
|
| 718 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 719 |
+
|
| 720 |
+
# Chat input
|
| 721 |
+
col1, col2 = st.columns([5, 1])
|
| 722 |
+
with col1:
|
| 723 |
+
user_input = st.text_input("Type your message...", key="chat_input", label_visibility="collapsed", placeholder="Ask about crime safety, predictions, or get recommendations...")
|
| 724 |
+
with col2:
|
| 725 |
+
send_button = st.button("Send 📤", use_container_width=True)
|
| 726 |
+
|
| 727 |
+
# Handle chat submission
|
| 728 |
+
if send_button and user_input:
|
| 729 |
+
# Add user message to history
|
| 730 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 731 |
+
|
| 732 |
+
# Get AI response using Groq
|
| 733 |
+
with st.spinner("🧠 Thinking..."):
|
| 734 |
+
try:
|
| 735 |
+
client = get_groq_client()
|
| 736 |
+
|
| 737 |
+
# Create system prompt for crime prediction context
|
| 738 |
+
system_prompt = """You are an AI Crime Safety Assistant for a crime prediction system.
|
| 739 |
+
You help users understand:
|
| 740 |
+
- Crime patterns and trends in San Francisco
|
| 741 |
+
- How the XGBoost machine learning model predicts crime categories
|
| 742 |
+
- Safety tips and recommendations based on location and time
|
| 743 |
+
- What factors influence crime predictions (time, location, historical data)
|
| 744 |
+
|
| 745 |
+
Be helpful, concise, and informative. Keep responses to 2-3 sentences unless more detail is needed.
|
| 746 |
+
If asked about the model, explain it uses features like latitude, longitude, time, district, and description to predict crime types."""
|
| 747 |
+
|
| 748 |
+
# Prepare messages for Groq API
|
| 749 |
+
api_messages = [{"role": "system", "content": system_prompt}]
|
| 750 |
+
|
| 751 |
+
# Add recent chat history (last 5 messages for context)
|
| 752 |
+
for msg in st.session_state.messages[-5:]:
|
| 753 |
+
api_messages.append({"role": msg["role"], "content": msg["content"]})
|
| 754 |
+
|
| 755 |
+
# Get response from Groq
|
| 756 |
+
chat_completion = client.chat.completions.create(
|
| 757 |
+
messages=api_messages,
|
| 758 |
+
model="llama-3.3-70b-versatile",
|
| 759 |
+
temperature=0.7,
|
| 760 |
+
max_tokens=500
|
| 761 |
+
)
|
| 762 |
+
|
| 763 |
+
ai_response = chat_completion.choices[0].message.content
|
| 764 |
+
|
| 765 |
+
# Add AI response to history
|
| 766 |
+
st.session_state.messages.append({"role": "assistant", "content": ai_response})
|
| 767 |
+
|
| 768 |
+
except Exception as e:
|
| 769 |
+
error_msg = f"⚠️ Sorry, I encountered an error: {str(e)}"
|
| 770 |
+
st.session_state.messages.append({"role": "assistant", "content": error_msg})
|
| 771 |
+
|
| 772 |
+
# Rerun to update chat display
|
| 773 |
+
st.rerun()
|
| 774 |
|
| 775 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|