jamesong244's picture
Update app.py
53ebc95 verified
import streamlit as st
import pandas as pd
import folium
import seaborn as sns
import matplotlib.pyplot as plt
import os
import tempfile
import sys
import re
# --- Disable Telemetry ---
os.environ["CREWAI_TELEMETRY_OPT_OUT"] = "true"
import streamlit.components.v1 as components
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI
from crewai.tools import BaseTool
from fpdf import FPDF
# =========================================
# 1. PAGE CONFIGURATION
# =========================================
st.set_page_config(page_title="Crime Copilot Dashboard", layout="wide", page_icon="πŸš“")
st.title("πŸš“ AI Crime Intelligence Dashboard")
st.markdown("---")
# =========================================
# 2. HELPER FUNCTIONS (PDF & GUARDRAILS)
# =========================================
def create_pdf(report_text):
"""Creates a 100% Adobe-compatible PDF using a physical temporary file."""
pdf = FPDF()
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
# Title
pdf.set_font("Arial", "B", 16)
pdf.cell(200, 10, "Situation Report (SITREP)", ln=True, align="C")
pdf.ln(10)
# Body
pdf.set_font("Arial", size=12)
# Clean up markdown for PDF text
clean_text = report_text.replace("**", "").replace("## ", "").replace("### ", "").replace("# ", "")
clean_text = clean_text.replace("```markdown", "").replace("```", "")
clean_text = clean_text.encode('latin-1', 'replace').decode('latin-1')
pdf.multi_cell(0, 8, clean_text)
# Write to a physical temp file to guarantee Adobe compatibility
fd, temp_path = tempfile.mkstemp(suffix=".pdf")
os.close(fd) # Close file descriptor so FPDF can use it
pdf.output(temp_path, "F")
# Read pure binary data back
with open(temp_path, "rb") as f:
pdf_bytes = f.read()
os.remove(temp_path) # Clean up
return pdf_bytes
def validate_data_guardrails(df):
"""Scans for prompt injection attacks."""
suspicious_phrases = ["ignore previous instructions", "disregard all previous", "you are an ai", "bypass instructions"]
for col in df.columns:
for phrase in suspicious_phrases:
if phrase in str(col).lower():
return False, f"Prompt injection detected in column: '{col}'"
str_cols = df.select_dtypes(include=['object']).columns
for col in str_cols:
for val in df[col].dropna().head(500):
for phrase in suspicious_phrases:
if phrase in str(val).lower():
return False, f"Prompt injection detected in data."
return True, "Passed"
# =========================================
# 3. SESSION STATE SETUP
# =========================================
if 'data_cache' not in st.session_state: st.session_state.data_cache = None
if 'crew_result' not in st.session_state: st.session_state.crew_result = None
if 'mo_result' not in st.session_state: st.session_state.mo_result = None
if 'current_filename' not in st.session_state: st.session_state.current_filename = ""
if 'start_date' not in st.session_state: st.session_state.start_date = None
if 'end_date' not in st.session_state: st.session_state.end_date = None
# =========================================
# 4. SIDEBAR & DATA LOADING
# =========================================
with st.sidebar:
st.header("βš™οΈ Configuration")
#api_key_input = st.text_input("OpenAI API Key", type="password")
#if api_key_input: os.environ["OPENAI_API_KEY"] = api_key_input
# Make the API key input optional for reviewers
api_key_input = st.text_input("OpenAI API Key (Leave blank to use Demo Key)", type="password")
# If the user types a key, use it. Otherwise, Hugging Face will automatically
# use the secret OPENAI_API_KEY environment variable we set in the settings.
if api_key_input:
os.environ["OPENAI_API_KEY"] = api_key_input
st.header("πŸ“‚ Data Upload")
# uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader")
# Provide the sample dataset download
with open("sample_crime_data.csv", "rb") as file:
st.download_button(
label="⬇️ Download Sample Crime Data",
data=file,
file_name="sample_crime_data.csv",
mime="text/csv",
help="Download this file and upload it below to test the dashboard."
)
uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader")
date_filter_container = st.container()
st.markdown("---")
analyze_mo = st.checkbox("πŸ•΅οΈ Analyse Crime Operandi (MO)", value=False, help="Uses an additional AI Profiler to detect patterns to alert patrol officers.")
if st.session_state.crew_result is not None:
st.markdown("---")
st.header("πŸ“₯ Export Options")
result_obj = st.session_state.crew_result
report_text = result_obj.raw if hasattr(result_obj, 'raw') and isinstance(result_obj.raw, str) else str(result_obj)
pdf_bytes = create_pdf(report_text)
dynamic_filename = f"SITREP_{st.session_state.start_date}_to_{st.session_state.end_date}.pdf"
st.download_button(
label="πŸ“„ Download SITREP (PDF)",
data=pdf_bytes,
file_name=dynamic_filename,
mime="application/pdf",
type="primary"
)
def load_raw_data(file):
try: return pd.read_csv(file, low_memory=False)
except Exception as e: return None
if uploaded_file:
if uploaded_file.name != st.session_state.current_filename:
st.session_state.data_cache = None
st.session_state.crew_result = None
st.session_state.mo_result = None
st.session_state.current_filename = uploaded_file.name
st.rerun()
raw_df = load_raw_data(uploaded_file)
if raw_df is not None:
lat_col = next((col for col in raw_df.columns if 'lat' in col.lower() or col.lower() == 'y'), None)
lon_col = next((col for col in raw_df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or col.lower() == 'x'), None)
if lat_col and lon_col:
raw_df[lat_col] = pd.to_numeric(raw_df[lat_col], errors='coerce')
raw_df[lon_col] = pd.to_numeric(raw_df[lon_col], errors='coerce')
raw_df = raw_df.dropna(subset=[lat_col, lon_col])
date_col = next((col for col in raw_df.columns if 'date' in col.lower() and 'time' not in col.lower()), None)
if not date_col: date_col = next((col for col in raw_df.columns if 'datetime' in col.lower()), None)
if date_col:
raw_df[date_col] = pd.to_datetime(raw_df[date_col], errors='coerce', dayfirst=False)
raw_df = raw_df.dropna(subset=[date_col])
min_date, max_date = raw_df[date_col].min().date(), raw_df[date_col].max().date()
# Fix: Group the stats UI neatly
with date_filter_container:
st.header("πŸ“… Analysis Period")
start = st.date_input("Start Date", min_date, min_value=min_date, max_value=max_date)
end = st.date_input("End Date", max_date, min_value=min_date, max_value=max_date)
st.session_state.start_date, st.session_state.end_date = start, end
mask = (raw_df[date_col].dt.date >= start) & (raw_df[date_col].dt.date <= end)
st.session_state.data_cache = raw_df.loc[mask]
# Styled highlight boxes
st.info(f"Total Rows in File: **{len(raw_df)}**")
st.success(f"Rows in Selected Dates: **{len(st.session_state.data_cache)}**")
if lat_col and lon_col:
st.caption(f"πŸ“ **Map Ready Points:** {len(st.session_state.data_cache)}")
# =========================================
# 5. TOOLS
# =========================================
class MapVizTool(BaseTool):
name: str = "Crime Heatmap Generator"
description: str = "Analyzes location data."
def _run(self, dummy_arg: str) -> str:
df = st.session_state.data_cache
if df is None or df.empty: return "Error: No data."
neigh_col = next((col for col in df.columns if 'neighbor' in col.lower() or 'analysis' in col.lower()), None)
if neigh_col: return f"Top 3 High-Crime Neighborhoods:\n{df[neigh_col].value_counts().head(3).to_string()}"
return "Locations processed."
class ChartVizTool(BaseTool):
name: str = "Crime Trend Chart Generator"
description: str = "Generates a trend chart."
def _run(self, dummy_arg: str) -> str:
df = st.session_state.data_cache
if df is None or df.empty: return "Error: No data."
# Fix: Strict column detection prioritizing "Incident Category"
cat_col = None
for col in df.columns:
if 'incident category' in col.lower():
cat_col = col
break
if not cat_col:
for col in df.columns:
if 'category' in col.lower():
cat_col = col
break
if not cat_col:
cat_col = next((col for col in df.columns if 'desc' in col.lower() or 'type' in col.lower()), None)
if not cat_col: return "Error: No category column."
plt.figure(figsize=(10, 6))
top_crimes = df[cat_col].value_counts().head(5)
sns.barplot(x=top_crimes.values, y=top_crimes.index, hue=top_crimes.index, palette="magma", legend=False)
plt.title("Top 5 Crime Trends")
plt.xlabel("Count")
plt.ylabel(cat_col)
plt.tight_layout()
plt.savefig("crime_chart.png")
plt.close()
return f"Chart Data: Top 5 Crime Categories:\n{top_crimes.to_string()}"
# =========================================
# 6. EXECUTION
# =========================================
if st.button("πŸš€ Run Analysis", type="primary"):
if not os.environ.get("OPENAI_API_KEY"): st.error("Please enter your API Key."); st.stop()
if st.session_state.data_cache is None: st.error("❌ No dataset loaded!"); st.stop()
is_safe, security_msg = validate_data_guardrails(st.session_state.data_cache)
if not is_safe: st.error(f"🚨 Security Alert: {security_msg}"); st.stop()
else: st.success("βœ… LLM Guardrails Validation Passed")
with st.spinner("πŸ€– AI Agents are generating intelligence..."):
try:
llm = ChatOpenAI(model="gpt-4o", verbose=True, temperature=0.3)
analyst = Agent(
role="Senior Analyst",
goal="Analyze crime statistics accurately.",
backstory="You provide hard numbers on top crime types and locations.",
tools=[MapVizTool(), ChartVizTool()],
llm=llm,
verbose=True
)
writer = Agent(
role="Commander",
goal="Write a detailed Situation Report (SITREP).",
backstory="You write executive summaries. You MUST use the exact numbers provided by the Analyst.",
llm=llm,
verbose=True
)
auditor = Agent(
role="Formatter",
goal="Ensure strict Markdown formatting.",
backstory="Formats reports perfectly. Removes all triple backticks (```) from output.",
llm=llm,
verbose=True
)
start_str, end_str = str(st.session_state.start_date), str(st.session_state.end_date)
t1 = Task(
description=f"Analyze data from {start_str} to {end_str}. 1. Use the Chart Tool to get top crime stats. 2. Use the Map Tool to get hotspot neighborhoods.",
agent=analyst,
expected_output="A summary of top crime stats and hotspots."
)
# Fix: Strict Markdown Template for SITREP to match user requirements
t2 = Task(
description=f"Write a Situation Report using EXACT numbers from the Analyst. You MUST use this exact markdown structure:\n"
f"# SITREP: {start_str} to {end_str}\n\n"
"## Overview\n[Write a brief summary of the period]\n\n"
"## Threat Breakdown\n[List the top 5 crimes with their exact numbers]\n\n"
"## Hotspots\n[List the top neighborhoods with their exact numbers]\n\n"
"## Recommendations\n[Provide tactical advice]",
agent=writer,
expected_output="A strictly formatted SITREP text with numbers."
)
t3 = Task(
description="Review the SITREP. Ensure it strictly uses the specific Markdown headers provided (# SITREP, ## Overview, ## Threat Breakdown, ## Hotspots, ## Recommendations). Strip any markdown code fences (```) from the final output.",
agent=auditor,
expected_output="Clean Markdown report."
)
tasks_list = [t1, t2, t3]
agents_list = [analyst, writer, auditor]
if analyze_mo:
profiler = Agent(
role="Behavioral Profiler",
goal="Identify Modus Operandi (MO) and patterns.",
backstory="Expert in predicting criminal behavior and generating BOLO (Be On the Look Out) alerts.",
llm=llm,
verbose=True
)
t_mo = Task(
description=f"Based on the Analyst's findings for {start_str} to {end_str}, write a 'Tactical MO Alert'. Identify the most likely operational patterns. Provide 3 bullet points for patrol officers.",
agent=profiler,
expected_output="Tactical MO Alert."
)
agents_list.append(profiler)
tasks_list.append(t_mo)
crew = Crew(agents=agents_list, tasks=tasks_list, verbose=True)
crew.kickoff()
# Fix: Explicitly grab the output of the final report, avoiding the overwrite bug
st.session_state.crew_result = t3.output.raw if hasattr(t3.output, 'raw') else str(t3.output)
if analyze_mo:
st.session_state.mo_result = t_mo.output.raw if hasattr(t_mo.output, 'raw') else str(t_mo.output)
else:
st.session_state.mo_result = None
st.success("Analysis Complete!")
st.rerun()
except Exception as e:
st.error(f"Error: {e}")
# =========================================
# 7. PERSISTENT DISPLAY
# =========================================
if st.session_state.crew_result is not None:
if st.session_state.mo_result:
tabs = st.tabs(["πŸ“„ Report", "πŸ—ΊοΈ Map", "πŸ“Š Charts", "πŸ•΅οΈ MO Analysis"])
tab_report, tab_map, tab_chart, tab_mo = tabs[0], tabs[1], tabs[2], tabs[3]
else:
tabs = st.tabs(["πŸ“„ Report", "πŸ—ΊοΈ Map", "πŸ“Š Charts"])
tab_report, tab_map, tab_chart = tabs[0], tabs[1], tabs[2]
with tab_report:
res = st.session_state.crew_result
report_text = str(res)
# Clean Markdown Fences
report_text = report_text.strip()
if report_text.lower().startswith("```markdown"): report_text = report_text[11:]
elif report_text.startswith("```"): report_text = report_text[3:]
if report_text.endswith("```"): report_text = report_text[:-3]
st.markdown(report_text.strip(), unsafe_allow_html=True)
with tab_map:
df = st.session_state.data_cache
lat_col = next((col for col in df.columns if 'lat' in col.lower() or 'y' == col.lower()), None)
lon_col = next((col for col in df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or 'x' == col.lower()), None)
if lat_col and lon_col:
map_data = df.dropna(subset=[lat_col, lon_col])
if not map_data.empty:
m = folium.Map(location=[map_data[lat_col].mean(), map_data[lon_col].mean()], tiles='CartoDB positron', zoom_start=11)
from folium.plugins import HeatMap
HeatMap(map_data[[lat_col, lon_col]].head(5000).values.tolist(), radius=12, blur=15, min_opacity=0.4, gradient={0.4: 'blue', 0.65: 'lime', 1: 'red'}).add_to(m)
m.fit_bounds([map_data[[lat_col, lon_col]].min().values.tolist(), map_data[[lat_col, lon_col]].max().values.tolist()])
components.html(m._repr_html_(), height=500)
with tab_chart:
# Crime Category Chart
st.markdown("### πŸ“Š Top Crime Categories")
if os.path.exists("crime_chart.png"): st.image("crime_chart.png")
# --- NEW QUICK WIN: Time of Day Analysis Chart ---
st.markdown("---")
st.markdown("### ⏰ Incidents by Time of Day")
df_chart = st.session_state.data_cache
if df_chart is not None and not df_chart.empty:
# Look for Time or Datetime columns
time_col = next((col for col in df_chart.columns if 'time' in col.lower() and 'datetime' not in col.lower()), None)
dt_col = next((col for col in df_chart.columns if 'datetime' in col.lower()), None)
hours = None
if time_col:
hours = pd.to_datetime(df_chart[time_col], format='%H:%M', errors='coerce').dt.hour
if hours.isna().all():
hours = pd.to_datetime(df_chart[time_col], errors='coerce').dt.hour
elif dt_col:
hours = pd.to_datetime(df_chart[dt_col], errors='coerce').dt.hour
else:
date_col_fallback = next((col for col in df_chart.columns if 'date' in col.lower()), None)
if date_col_fallback:
hours = pd.to_datetime(df_chart[date_col_fallback], errors='coerce').dt.hour
if hours is not None and not hours.isna().all():
hourly_counts = hours.value_counts().sort_index()
fig, ax = plt.subplots(figsize=(10, 4))
sns.barplot(x=hourly_counts.index.astype(int), y=hourly_counts.values, palette="coolwarm", ax=ax)
ax.set_xlabel("Hour of Day (0-23)")
ax.set_ylabel("Number of Incidents")
plt.tight_layout()
st.pyplot(fig)
else:
st.info("Time data not available or parseable in this dataset.")
if st.session_state.mo_result:
with tab_mo:
st.info("🧠 Insights generated by the Behavioral Profiler Agent")
mo_text = str(st.session_state.mo_result).strip()
if mo_text.lower().startswith("```markdown"): mo_text = mo_text[11:]
elif mo_text.startswith("```"): mo_text = mo_text[3:]
if mo_text.endswith("```"): mo_text = mo_text[:-3]
st.markdown(mo_text.strip())