Spaces:

jamesong244
/

crime-copilot-docker

Sleeping

App Files Files Community

crime-copilot-docker / app.py

jamesong244

Update app.py

53ebc95 verified about 2 months ago

raw

history blame contribute delete

19.2 kB

	import streamlit as st
	import pandas as pd
	import folium
	import seaborn as sns
	import matplotlib.pyplot as plt
	import os
	import tempfile
	import sys
	import re

	# --- Disable Telemetry ---
	os.environ["CREWAI_TELEMETRY_OPT_OUT"] = "true"

	import streamlit.components.v1 as components
	from crewai import Agent, Task, Crew, Process
	from langchain_openai import ChatOpenAI
	from crewai.tools import BaseTool
	from fpdf import FPDF

	# =========================================
	# 1. PAGE CONFIGURATION
	# =========================================
	st.set_page_config(page_title="Crime Copilot Dashboard", layout="wide", page_icon="🚓")

	st.title("🚓 AI Crime Intelligence Dashboard")
	st.markdown("---")

	# =========================================
	# 2. HELPER FUNCTIONS (PDF & GUARDRAILS)
	# =========================================
	def create_pdf(report_text):
	"""Creates a 100% Adobe-compatible PDF using a physical temporary file."""
	pdf = FPDF()
	pdf.add_page()
	pdf.set_auto_page_break(auto=True, margin=15)

	# Title
	pdf.set_font("Arial", "B", 16)
	pdf.cell(200, 10, "Situation Report (SITREP)", ln=True, align="C")
	pdf.ln(10)

	# Body
	pdf.set_font("Arial", size=12)

	# Clean up markdown for PDF text
	clean_text = report_text.replace("**", "").replace("## ", "").replace("### ", "").replace("# ", "")
	clean_text = clean_text.replace("```markdown", "").replace("```", "")
	clean_text = clean_text.encode('latin-1', 'replace').decode('latin-1')

	pdf.multi_cell(0, 8, clean_text)

	# Write to a physical temp file to guarantee Adobe compatibility
	fd, temp_path = tempfile.mkstemp(suffix=".pdf")
	os.close(fd) # Close file descriptor so FPDF can use it

	pdf.output(temp_path, "F")

	# Read pure binary data back
	with open(temp_path, "rb") as f:
	pdf_bytes = f.read()

	os.remove(temp_path) # Clean up
	return pdf_bytes

	def validate_data_guardrails(df):
	"""Scans for prompt injection attacks."""
	suspicious_phrases = ["ignore previous instructions", "disregard all previous", "you are an ai", "bypass instructions"]
	for col in df.columns:
	for phrase in suspicious_phrases:
	if phrase in str(col).lower():
	return False, f"Prompt injection detected in column: '{col}'"
	str_cols = df.select_dtypes(include=['object']).columns
	for col in str_cols:
	for val in df[col].dropna().head(500):
	for phrase in suspicious_phrases:
	if phrase in str(val).lower():
	return False, f"Prompt injection detected in data."
	return True, "Passed"

	# =========================================
	# 3. SESSION STATE SETUP
	# =========================================
	if 'data_cache' not in st.session_state: st.session_state.data_cache = None
	if 'crew_result' not in st.session_state: st.session_state.crew_result = None
	if 'mo_result' not in st.session_state: st.session_state.mo_result = None
	if 'current_filename' not in st.session_state: st.session_state.current_filename = ""
	if 'start_date' not in st.session_state: st.session_state.start_date = None
	if 'end_date' not in st.session_state: st.session_state.end_date = None

	# =========================================
	# 4. SIDEBAR & DATA LOADING
	# =========================================
	with st.sidebar:
	st.header("⚙️ Configuration")
	#api_key_input = st.text_input("OpenAI API Key", type="password")
	#if api_key_input: os.environ["OPENAI_API_KEY"] = api_key_input


	# Make the API key input optional for reviewers
	api_key_input = st.text_input("OpenAI API Key (Leave blank to use Demo Key)", type="password")

	# If the user types a key, use it. Otherwise, Hugging Face will automatically
	# use the secret OPENAI_API_KEY environment variable we set in the settings.
	if api_key_input:
	os.environ["OPENAI_API_KEY"] = api_key_input

	st.header("📂 Data Upload")
	# uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader")

	# Provide the sample dataset download
	with open("sample_crime_data.csv", "rb") as file:
	st.download_button(
	label="⬇️ Download Sample Crime Data",
	data=file,
	file_name="sample_crime_data.csv",
	mime="text/csv",
	help="Download this file and upload it below to test the dashboard."
	)

	uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader")


	date_filter_container = st.container()

	st.markdown("---")
	analyze_mo = st.checkbox("🕵️ Analyse Crime Operandi (MO)", value=False, help="Uses an additional AI Profiler to detect patterns to alert patrol officers.")

	if st.session_state.crew_result is not None:
	st.markdown("---")
	st.header("📥 Export Options")

	result_obj = st.session_state.crew_result
	report_text = result_obj.raw if hasattr(result_obj, 'raw') and isinstance(result_obj.raw, str) else str(result_obj)
	pdf_bytes = create_pdf(report_text)

	dynamic_filename = f"SITREP_{st.session_state.start_date}_to_{st.session_state.end_date}.pdf"

	st.download_button(
	label="📄 Download SITREP (PDF)",
	data=pdf_bytes,
	file_name=dynamic_filename,
	mime="application/pdf",
	type="primary"
	)

	def load_raw_data(file):
	try: return pd.read_csv(file, low_memory=False)
	except Exception as e: return None

	if uploaded_file:
	if uploaded_file.name != st.session_state.current_filename:
	st.session_state.data_cache = None
	st.session_state.crew_result = None
	st.session_state.mo_result = None
	st.session_state.current_filename = uploaded_file.name
	st.rerun()

	raw_df = load_raw_data(uploaded_file)

	if raw_df is not None:
	lat_col = next((col for col in raw_df.columns if 'lat' in col.lower() or col.lower() == 'y'), None)
	lon_col = next((col for col in raw_df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or col.lower() == 'x'), None)

	if lat_col and lon_col:
	raw_df[lat_col] = pd.to_numeric(raw_df[lat_col], errors='coerce')
	raw_df[lon_col] = pd.to_numeric(raw_df[lon_col], errors='coerce')
	raw_df = raw_df.dropna(subset=[lat_col, lon_col])

	date_col = next((col for col in raw_df.columns if 'date' in col.lower() and 'time' not in col.lower()), None)
	if not date_col: date_col = next((col for col in raw_df.columns if 'datetime' in col.lower()), None)

	if date_col:
	raw_df[date_col] = pd.to_datetime(raw_df[date_col], errors='coerce', dayfirst=False)
	raw_df = raw_df.dropna(subset=[date_col])
	min_date, max_date = raw_df[date_col].min().date(), raw_df[date_col].max().date()

	# Fix: Group the stats UI neatly
	with date_filter_container:
	st.header("📅 Analysis Period")
	start = st.date_input("Start Date", min_date, min_value=min_date, max_value=max_date)
	end = st.date_input("End Date", max_date, min_value=min_date, max_value=max_date)

	st.session_state.start_date, st.session_state.end_date = start, end
	mask = (raw_df[date_col].dt.date >= start) & (raw_df[date_col].dt.date <= end)
	st.session_state.data_cache = raw_df.loc[mask]

	# Styled highlight boxes
	st.info(f"Total Rows in File: {len(raw_df)}")
	st.success(f"Rows in Selected Dates: {len(st.session_state.data_cache)}")

	if lat_col and lon_col:
	st.caption(f"📍 Map Ready Points: {len(st.session_state.data_cache)}")

	# =========================================
	# 5. TOOLS
	# =========================================
	class MapVizTool(BaseTool):
	name: str = "Crime Heatmap Generator"
	description: str = "Analyzes location data."
	def _run(self, dummy_arg: str) -> str:
	df = st.session_state.data_cache
	if df is None or df.empty: return "Error: No data."
	neigh_col = next((col for col in df.columns if 'neighbor' in col.lower() or 'analysis' in col.lower()), None)
	if neigh_col: return f"Top 3 High-Crime Neighborhoods:\n{df[neigh_col].value_counts().head(3).to_string()}"
	return "Locations processed."

	class ChartVizTool(BaseTool):
	name: str = "Crime Trend Chart Generator"
	description: str = "Generates a trend chart."
	def _run(self, dummy_arg: str) -> str:
	df = st.session_state.data_cache
	if df is None or df.empty: return "Error: No data."

	# Fix: Strict column detection prioritizing "Incident Category"
	cat_col = None
	for col in df.columns:
	if 'incident category' in col.lower():
	cat_col = col
	break
	if not cat_col:
	for col in df.columns:
	if 'category' in col.lower():
	cat_col = col
	break
	if not cat_col:
	cat_col = next((col for col in df.columns if 'desc' in col.lower() or 'type' in col.lower()), None)

	if not cat_col: return "Error: No category column."

	plt.figure(figsize=(10, 6))
	top_crimes = df[cat_col].value_counts().head(5)
	sns.barplot(x=top_crimes.values, y=top_crimes.index, hue=top_crimes.index, palette="magma", legend=False)
	plt.title("Top 5 Crime Trends")
	plt.xlabel("Count")
	plt.ylabel(cat_col)
	plt.tight_layout()
	plt.savefig("crime_chart.png")
	plt.close()
	return f"Chart Data: Top 5 Crime Categories:\n{top_crimes.to_string()}"

	# =========================================
	# 6. EXECUTION
	# =========================================
	if st.button("🚀 Run Analysis", type="primary"):
	if not os.environ.get("OPENAI_API_KEY"): st.error("Please enter your API Key."); st.stop()
	if st.session_state.data_cache is None: st.error("❌ No dataset loaded!"); st.stop()

	is_safe, security_msg = validate_data_guardrails(st.session_state.data_cache)
	if not is_safe: st.error(f"🚨 Security Alert: {security_msg}"); st.stop()
	else: st.success("✅ LLM Guardrails Validation Passed")

	with st.spinner("🤖 AI Agents are generating intelligence..."):
	try:
	llm = ChatOpenAI(model="gpt-4o", verbose=True, temperature=0.3)

	analyst = Agent(
	role="Senior Analyst",
	goal="Analyze crime statistics accurately.",
	backstory="You provide hard numbers on top crime types and locations.",
	tools=[MapVizTool(), ChartVizTool()],
	llm=llm,
	verbose=True
	)
	writer = Agent(
	role="Commander",
	goal="Write a detailed Situation Report (SITREP).",
	backstory="You write executive summaries. You MUST use the exact numbers provided by the Analyst.",
	llm=llm,
	verbose=True
	)
	auditor = Agent(
	role="Formatter",
	goal="Ensure strict Markdown formatting.",
	backstory="Formats reports perfectly. Removes all triple backticks (```) from output.",
	llm=llm,
	verbose=True
	)

	start_str, end_str = str(st.session_state.start_date), str(st.session_state.end_date)

	t1 = Task(
	description=f"Analyze data from {start_str} to {end_str}. 1. Use the Chart Tool to get top crime stats. 2. Use the Map Tool to get hotspot neighborhoods.",
	agent=analyst,
	expected_output="A summary of top crime stats and hotspots."
	)

	# Fix: Strict Markdown Template for SITREP to match user requirements
	t2 = Task(
	description=f"Write a Situation Report using EXACT numbers from the Analyst. You MUST use this exact markdown structure:\n"
	f"# SITREP: {start_str} to {end_str}\n\n"
	"## Overview\n[Write a brief summary of the period]\n\n"
	"## Threat Breakdown\n[List the top 5 crimes with their exact numbers]\n\n"
	"## Hotspots\n[List the top neighborhoods with their exact numbers]\n\n"
	"## Recommendations\n[Provide tactical advice]",
	agent=writer,
	expected_output="A strictly formatted SITREP text with numbers."
	)
	t3 = Task(
	description="Review the SITREP. Ensure it strictly uses the specific Markdown headers provided (# SITREP, ## Overview, ## Threat Breakdown, ## Hotspots, ## Recommendations). Strip any markdown code fences (```) from the final output.",
	agent=auditor,
	expected_output="Clean Markdown report."
	)

	tasks_list = [t1, t2, t3]
	agents_list = [analyst, writer, auditor]

	if analyze_mo:
	profiler = Agent(
	role="Behavioral Profiler",
	goal="Identify Modus Operandi (MO) and patterns.",
	backstory="Expert in predicting criminal behavior and generating BOLO (Be On the Look Out) alerts.",
	llm=llm,
	verbose=True
	)
	t_mo = Task(
	description=f"Based on the Analyst's findings for {start_str} to {end_str}, write a 'Tactical MO Alert'. Identify the most likely operational patterns. Provide 3 bullet points for patrol officers.",
	agent=profiler,
	expected_output="Tactical MO Alert."
	)
	agents_list.append(profiler)
	tasks_list.append(t_mo)

	crew = Crew(agents=agents_list, tasks=tasks_list, verbose=True)
	crew.kickoff()

	# Fix: Explicitly grab the output of the final report, avoiding the overwrite bug
	st.session_state.crew_result = t3.output.raw if hasattr(t3.output, 'raw') else str(t3.output)

	if analyze_mo:
	st.session_state.mo_result = t_mo.output.raw if hasattr(t_mo.output, 'raw') else str(t_mo.output)
	else:
	st.session_state.mo_result = None

	st.success("Analysis Complete!")
	st.rerun()

	except Exception as e:
	st.error(f"Error: {e}")

	# =========================================
	# 7. PERSISTENT DISPLAY
	# =========================================
	if st.session_state.crew_result is not None:

	if st.session_state.mo_result:
	tabs = st.tabs(["📄 Report", "🗺️ Map", "📊 Charts", "🕵️ MO Analysis"])
	tab_report, tab_map, tab_chart, tab_mo = tabs[0], tabs[1], tabs[2], tabs[3]
	else:
	tabs = st.tabs(["📄 Report", "🗺️ Map", "📊 Charts"])
	tab_report, tab_map, tab_chart = tabs[0], tabs[1], tabs[2]

	with tab_report:
	res = st.session_state.crew_result
	report_text = str(res)

	# Clean Markdown Fences
	report_text = report_text.strip()
	if report_text.lower().startswith("```markdown"): report_text = report_text[11:]
	elif report_text.startswith("```"): report_text = report_text[3:]
	if report_text.endswith("```"): report_text = report_text[:-3]

	st.markdown(report_text.strip(), unsafe_allow_html=True)

	with tab_map:
	df = st.session_state.data_cache
	lat_col = next((col for col in df.columns if 'lat' in col.lower() or 'y' == col.lower()), None)
	lon_col = next((col for col in df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or 'x' == col.lower()), None)

	if lat_col and lon_col:
	map_data = df.dropna(subset=[lat_col, lon_col])
	if not map_data.empty:
	m = folium.Map(location=[map_data[lat_col].mean(), map_data[lon_col].mean()], tiles='CartoDB positron', zoom_start=11)
	from folium.plugins import HeatMap
	HeatMap(map_data[[lat_col, lon_col]].head(5000).values.tolist(), radius=12, blur=15, min_opacity=0.4, gradient={0.4: 'blue', 0.65: 'lime', 1: 'red'}).add_to(m)
	m.fit_bounds([map_data[[lat_col, lon_col]].min().values.tolist(), map_data[[lat_col, lon_col]].max().values.tolist()])
	components.html(m._repr_html_(), height=500)

	with tab_chart:
	# Crime Category Chart
	st.markdown("### 📊 Top Crime Categories")
	if os.path.exists("crime_chart.png"): st.image("crime_chart.png")

	# --- NEW QUICK WIN: Time of Day Analysis Chart ---
	st.markdown("---")
	st.markdown("### ⏰ Incidents by Time of Day")
	df_chart = st.session_state.data_cache
	if df_chart is not None and not df_chart.empty:
	# Look for Time or Datetime columns
	time_col = next((col for col in df_chart.columns if 'time' in col.lower() and 'datetime' not in col.lower()), None)
	dt_col = next((col for col in df_chart.columns if 'datetime' in col.lower()), None)

	hours = None
	if time_col:
	hours = pd.to_datetime(df_chart[time_col], format='%H:%M', errors='coerce').dt.hour
	if hours.isna().all():
	hours = pd.to_datetime(df_chart[time_col], errors='coerce').dt.hour
	elif dt_col:
	hours = pd.to_datetime(df_chart[dt_col], errors='coerce').dt.hour
	else:
	date_col_fallback = next((col for col in df_chart.columns if 'date' in col.lower()), None)
	if date_col_fallback:
	hours = pd.to_datetime(df_chart[date_col_fallback], errors='coerce').dt.hour

	if hours is not None and not hours.isna().all():
	hourly_counts = hours.value_counts().sort_index()

	fig, ax = plt.subplots(figsize=(10, 4))
	sns.barplot(x=hourly_counts.index.astype(int), y=hourly_counts.values, palette="coolwarm", ax=ax)
	ax.set_xlabel("Hour of Day (0-23)")
	ax.set_ylabel("Number of Incidents")
	plt.tight_layout()
	st.pyplot(fig)
	else:
	st.info("Time data not available or parseable in this dataset.")

	if st.session_state.mo_result:
	with tab_mo:
	st.info("🧠 Insights generated by the Behavioral Profiler Agent")

	mo_text = str(st.session_state.mo_result).strip()
	if mo_text.lower().startswith("```markdown"): mo_text = mo_text[11:]
	elif mo_text.startswith("```"): mo_text = mo_text[3:]
	if mo_text.endswith("```"): mo_text = mo_text[:-3]

	st.markdown(mo_text.strip())