import streamlit as st import pandas as pd import folium import seaborn as sns import matplotlib.pyplot as plt import os import tempfile import sys import re # --- Disable Telemetry --- os.environ["CREWAI_TELEMETRY_OPT_OUT"] = "true" import streamlit.components.v1 as components from crewai import Agent, Task, Crew, Process from langchain_openai import ChatOpenAI from crewai.tools import BaseTool from fpdf import FPDF # --- Global Formatting --- pd.set_option('display.float_format', lambda x: '%.0f' % x) # ========================================= # 1. PAGE CONFIGURATION # ========================================= st.set_page_config(page_title="Crime Copilot Dashboard", layout="wide", page_icon="π") st.title("π AI Crime Intelligence Dashboard") st.markdown("---") # ========================================= # 2. HELPER FUNCTIONS (PDF & GUARDRAILS) # ========================================= def create_pdf(report_text): """Creates a 100% Adobe-compatible PDF using a physical temporary file.""" pdf = FPDF() pdf.add_page() pdf.set_auto_page_break(auto=True, margin=15) # Title pdf.set_font("Arial", "B", 16) pdf.cell(200, 10, "Situation Report (SITREP)", ln=True, align="C") pdf.ln(10) # Body pdf.set_font("Arial", size=12) # Clean up markdown for PDF text clean_text = report_text.replace("**", "").replace("## ", "").replace("### ", "").replace("# ", "") clean_text = clean_text.replace("```markdown", "").replace("```", "") clean_text = clean_text.encode('latin-1', 'replace').decode('latin-1') pdf.multi_cell(0, 8, clean_text) # Write to a physical temp file to guarantee Adobe compatibility fd, temp_path = tempfile.mkstemp(suffix=".pdf") os.close(fd) # Close file descriptor so FPDF can use it pdf.output(temp_path, "F") # Read pure binary data back with open(temp_path, "rb") as f: pdf_bytes = f.read() os.remove(temp_path) # Clean up return pdf_bytes def validate_data_guardrails(df): """Scans for prompt injection attacks.""" suspicious_phrases = ["ignore previous instructions", "disregard all previous", "you are an ai", "bypass instructions"] for col in df.columns: for phrase in suspicious_phrases: if phrase in str(col).lower(): return False, f"Prompt injection detected in column: '{col}'" str_cols = df.select_dtypes(include=['object']).columns for col in str_cols: for val in df[col].dropna().head(500): for phrase in suspicious_phrases: if phrase in str(val).lower(): return False, f"Prompt injection detected in data." return True, "Passed" # ========================================= # 3. SESSION STATE SETUP # ========================================= if 'data_cache' not in st.session_state: st.session_state.data_cache = None if 'crew_result' not in st.session_state: st.session_state.crew_result = None if 'mo_result' not in st.session_state: st.session_state.mo_result = None if 'current_filename' not in st.session_state: st.session_state.current_filename = "" if 'start_date' not in st.session_state: st.session_state.start_date = None if 'end_date' not in st.session_state: st.session_state.end_date = None if 'bolo_vault' not in st.session_state: st.session_state.bolo_vault = [] if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'analysis_plan' not in st.session_state: st.session_state.analysis_plan = None if 'plan_approved' not in st.session_state: st.session_state.plan_approved = False if 'guardrail_results' not in st.session_state: st.session_state.guardrail_results = {} # ========================================= # 4. SIDEBAR & DATA LOADING # ========================================= with st.sidebar: st.header("βοΈ Configuration") #api_key_input = st.text_input("OpenAI API Key", type="password") #if api_key_input: os.environ["OPENAI_API_KEY"] = api_key_input # Make the API key input optional for reviewers api_key_input = st.text_input("OpenAI API Key (Leave blank to use Demo Key)", type="password") # If the user types a key, use it. Otherwise, Hugging Face will automatically # use the secret OPENAI_API_KEY environment variable we set in the settings. if api_key_input: os.environ["OPENAI_API_KEY"] = api_key_input st.header("π Data Upload") # uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader") # Provide the sample dataset download with open("sample_crime_data.csv", "rb") as file: st.download_button( label="β¬οΈ Download Sample Crime Data", data=file, file_name="sample_crime_data.csv", mime="text/csv", help="Download this file and upload it below to test the dashboard." ) uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader") date_filter_container = st.container() st.markdown("---") analyze_mo = st.checkbox("π΅οΈ Analyse Crime Operandi (MO)", value=False, help="Uses an additional AI Profiler to detect patterns to alert patrol officers.") if st.session_state.crew_result is not None: st.markdown("---") st.header("π₯ Export Options") result_obj = st.session_state.crew_result report_text = result_obj.raw if hasattr(result_obj, 'raw') and isinstance(result_obj.raw, str) else str(result_obj) pdf_bytes = create_pdf(report_text) dynamic_filename = f"SITREP_{st.session_state.start_date}_to_{st.session_state.end_date}.pdf" st.download_button( label="π Download SITREP (PDF)", data=pdf_bytes, file_name=dynamic_filename, mime="application/pdf", type="primary" ) def load_raw_data(file): try: return pd.read_csv(file, low_memory=False) except Exception as e: return None if uploaded_file: if uploaded_file.name != st.session_state.current_filename: st.session_state.data_cache = None st.session_state.crew_result = None st.session_state.mo_result = None st.session_state.current_filename = uploaded_file.name st.rerun() raw_df = load_raw_data(uploaded_file) if raw_df is not None: lat_col = next((col for col in raw_df.columns if 'lat' in col.lower() or col.lower() == 'y'), None) lon_col = next((col for col in raw_df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or col.lower() == 'x'), None) if lat_col and lon_col: raw_df[lat_col] = pd.to_numeric(raw_df[lat_col], errors='coerce') raw_df[lon_col] = pd.to_numeric(raw_df[lon_col], errors='coerce') raw_df = raw_df.dropna(subset=[lat_col, lon_col]) date_col = next((col for col in raw_df.columns if 'date' in col.lower() and 'time' not in col.lower()), None) if not date_col: date_col = next((col for col in raw_df.columns if 'datetime' in col.lower()), None) if date_col: raw_df[date_col] = pd.to_datetime(raw_df[date_col], errors='coerce', dayfirst=False) raw_df = raw_df.dropna(subset=[date_col]) min_date, max_date = raw_df[date_col].min().date(), raw_df[date_col].max().date() # Fix: Group the stats UI neatly with date_filter_container: st.header("π Analysis Period") start = st.date_input("Start Date", min_date, min_value=min_date, max_value=max_date) end = st.date_input("End Date", max_date, min_value=min_date, max_value=max_date) st.session_state.start_date, st.session_state.end_date = start, end mask = (raw_df[date_col].dt.date >= start) & (raw_df[date_col].dt.date <= end) st.session_state.data_cache = raw_df.loc[mask] # Styled highlight boxes st.info(f"Total Rows in File: **{len(raw_df)}**") st.success(f"Rows in Selected Dates: **{len(st.session_state.data_cache)}**") if lat_col and lon_col: st.caption(f"π **Map Ready Points:** {len(st.session_state.data_cache)}") # ========================================= # 5. TOOLS # ========================================= class DataDiscoveryTool(BaseTool): name: str = "Data Schema Explorer" description: str = "Use this tool FIRST to understand the dataset structure, column names, and sample data." def _run(self, dummy_arg: str = "") -> str: df = st.session_state.data_cache if df is None or df.empty: return "Error: No data loaded." buffer = [] buffer.append(f"Columns: {list(df.columns)}") buffer.append("\nFirst 3 rows of data:") # Prevent scientific notation in output buffer.append(df.head(3).to_string(index=False)) buffer.append("\nData Types:") buffer.append(df.dtypes.to_string()) return "\n".join(buffer) class TextSearchTool(BaseTool): name: str = "Crime Text Searcher" description: str = "Search for specific keywords (e.g., 'suspicious', 'knife', 'vehicle') within text columns. Returns full matching rows." def _run(self, keyword: str) -> str: df = st.session_state.data_cache if df is None or df.empty: return "Error: No data." # Identify text columns (object or string) text_cols = [col for col in df.columns if df[col].dtype == 'object' or df[col].dtype == 'string'] results = [] for col in text_cols: matches = df[df[col].astype(str).str.contains(keyword, case=False, na=False)] if not matches.empty: # Return the full row for context, but limit to 10 rows for brevity results.append(f"Found {len(matches)} matches in column '{col}':\n{matches.head(10).to_string(index=False)}") if not results: return f"No matches found for keyword '{keyword}' in any text column." return "\n\n".join(results) class DataQueryTool(BaseTool): name: str = "Specific Data Lookup" description: str = "Use this to filter the data for a specific value in a specific column (e.g., 'Incident ID' is 1329963)." def _run(self, column: str, value: str) -> str: df = st.session_state.data_cache if df is None or df.empty: return "Error: No data." if column not in df.columns: return f"Error: Column '{column}' not found. Available: {list(df.columns)}" try: val_to_search = pd.to_numeric(value) if pd.api.types.is_numeric_dtype(df[column]) else value matches = df[df[column] == val_to_search] except: matches = df[df[column].astype(str) == str(value)] if matches.empty: return f"No records found where '{column}' is '{value}'." # If it's a single record, return a clean, vertical list without noise/NaNs if len(matches) == 1: record = matches.iloc[0].to_dict() exclude_terms = ['lat', 'lon', 'point', 'cnn', 'row id', 'boundary', 'hsoc', 'supervisor district'] filtered = {k: v for k, v in record.items() if pd.notna(v) and not any(x in k.lower() for x in exclude_terms)} return "Specific Record Details:\n" + "\n".join([f"- **{k}**: {v}" for k, v in filtered.items()]) return f"Found {len(matches)} record(s):\n{matches.to_string(index=False)}" class MapVizTool(BaseTool): name: str = "Crime Heatmap Generator" description: str = "Analyzes location data to find high-crime neighborhoods." def _run(self, dummy_arg: str) -> str: df = st.session_state.data_cache if df is None or df.empty: return "Error: No data." # Try to find a neighborhood or district column neigh_col = next((col for col in df.columns if any(x in col.lower() for x in ['neighbor', 'analysis', 'district', 'area', 'precinct'])), None) if neigh_col: counts = df[neigh_col].value_counts().head(3) return f"Top 3 High-Crime Areas (using '{neigh_col}'):\n{counts.to_string()}" return "Locations processed, but no specific neighborhood column identified for stats." class ChartVizTool(BaseTool): name: str = "Crime Trend Chart Generator" description: str = "Generates charts (bar, pie, line) based on a specific category column. You can specify chart_type ('bar' or 'pie'), top_n, and save_path." def _run(self, category_column: str = "", save_path: str = "crime_chart.png", top_n: str = "5", chart_type: str = "bar") -> str: df = st.session_state.data_cache if df is None or df.empty: return "Error: No data." try: n = int(re.search(r'\d+', str(top_n)).group()) except: n = 5 cat_col = category_column if category_column in df.columns else None if not cat_col: search_terms = ['incident category', 'category', 'description', 'offense', 'type'] for term in search_terms: found = next((col for col in df.columns if term in col.lower()), None) if found: cat_col = found break if not cat_col: return "Error: Could not identify a crime category column." plt.figure(figsize=(10, 6)) top_crimes = df[cat_col].value_counts().head(n) if 'pie' in chart_type.lower(): plt.pie(top_crimes.values, labels=top_crimes.index, autopct='%1.1f%%', colors=sns.color_palette("magma", n)) plt.title(f"Top {n} Crime Categories Distribution ({cat_col})") else: sns.barplot(x=top_crimes.values, y=top_crimes.index, hue=top_crimes.index, palette="magma", legend=False) plt.title(f"Top {n} Crime Trends ({cat_col})") plt.xlabel("Count") plt.ylabel(cat_col) plt.tight_layout() plt.savefig(save_path) plt.close() return f"CHART_FILE:{save_path} | Chart Data: Top {n} categories from column '{cat_col}':\n{top_crimes.to_string()}" class BOLOTool(BaseTool): name: str = "BOLO Publisher" description: str = "Use this to create an official 'Be On The Look Out' (BOLO) alert for patrol officers." def _run(self, alert_content: str, urgency: str = "MEDIUM") -> str: new_bolo = { "source": "AI Intelligence Unit", "content": alert_content, "urgency": urgency.upper(), "timestamp": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M") } st.session_state.bolo_vault.append(new_bolo) return f"BOLO Successfully Published: {alert_content[:50]}..." class BulkBOLOTool(BaseTool): name: str = "Bulk BOLO Creator" description: str = "Use this to create many BOLOs at once. Input should be a number of BOLOs to generate from current findings." def _run(self, count: str) -> str: try: num = int(count) df = st.session_state.data_cache if df is None or df.empty: return "Error: No data to create BOLOs from." sample_data = df.head(num) for idx, row in sample_data.iterrows(): st.session_state.bolo_vault.append({ "source": "Bulk AI Dispatch", "content": f"Automated Alert: {row.get('Incident Category', 'Crime')} in {row.get('Analysis Neighborhood', 'Unknown Area')}", "urgency": "MEDIUM", "timestamp": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M") }) return f"Successfully created {len(sample_data)} BOLOs." except Exception as e: return f"Error: {e}" # ========================================= # 6. EXECUTION # ========================================= if st.button("π Run Analysis", type="primary"): if not os.environ.get("OPENAI_API_KEY"): st.error("Please enter your API Key."); st.stop() if st.session_state.data_cache is None: st.error("β No dataset loaded!"); st.stop() # --- RESET PREVIOUS STATE --- st.session_state.crew_result = None st.session_state.mo_result = None st.session_state.analysis_plan = None st.session_state.plan_approved = False st.session_state.bolo_vault = [] # Clear previous AI BOLOs (keep manual ones if desired, but here we clear for a fresh run) is_safe, security_msg = validate_data_guardrails(st.session_state.data_cache) if not is_safe: st.error(f"π¨ Security Alert: {security_msg}"); st.stop() else: st.success("β LLM Guardrails Validation Passed") with st.spinner("π€ Chief of Intelligence is drafting an Investigative Plan..."): try: llm = ChatOpenAI(model="gpt-4o", verbose=True, temperature=0.3) # --- PHASE 1: Plan Generation --- planner = Agent( role="Strategic Crime Intelligence Planner", goal="Review the available data and propose a high-level investigative focus for the team.", backstory="You are a veteran detective. You look at the columns and sample data to decide what the most critical areas of focus should be (e.g., specific crime surges or geographic hotspots).", tools=[DataDiscoveryTool()], llm=llm, verbose=True ) p1 = Task( description="Use the Schema Explorer to look at the data. Propose a 3-point Investigative Plan (e.g. '1. I will focus on Larceny trends in Pacific Heights...').", agent=planner, expected_output="A concise, 3-point investigative plan for approval." ) crew_plan = Crew(agents=[planner], tasks=[p1], verbose=True) result = crew_plan.kickoff() st.session_state.analysis_plan = result.raw if hasattr(result, 'raw') else str(result) st.rerun() except Exception as e: st.error(f"Planning Error: {e}") # --- Plan Approval Interface --- if st.session_state.analysis_plan and not st.session_state.plan_approved: st.markdown("---") st.warning("π΅οΈ **Proposed Investigative Plan (Approval Required)**") st.markdown(st.session_state.analysis_plan) col_app, col_rej = st.columns(2) with col_app: if st.button("β Approve & Execute Full Analysis", use_container_width=True): st.session_state.plan_approved = True st.rerun() with col_rej: if st.button("β Reject & Discard Plan", use_container_width=True): st.session_state.analysis_plan = None st.rerun() # --- Full Execution (Only if Approved) --- if st.session_state.plan_approved: with st.spinner("π€ AI Agents are executing the approved plan..."): try: llm = ChatOpenAI(model="gpt-4o", verbose=True, temperature=0.3) analyst = Agent( role="Senior Data Forensic Specialist", goal="Explore the crime dataset, identify the correct columns for analysis, and extract statistics accurately.", backstory="You are an expert at handling diverse datasets. Your first priority is to discover what the columns mean using the Schema Explorer Tool. Once you understand the schema, you use specialized tools to generate reports and trends based on the real column names you find.", tools=[DataDiscoveryTool(), MapVizTool(), ChartVizTool()], llm=llm, verbose=True ) writer = Agent( role="Commander", goal="Write a detailed Situation Report (SITREP).", backstory="You write executive summaries. You MUST use the exact numbers provided by the Analyst.", llm=llm, verbose=True ) auditor = Agent( role="Tactical Compliance Auditor", goal="Ensure the SITREP is accurate, avoids hallucinations, and follows privacy guardrails.", backstory="You are a senior oversight officer. You review the SITREP and MO alerts. You MUST verify that: 1. No PII (names/phone numbers) is present. 2. All numbers match the analyst's data. 3. The advice is actionable. If it's not, you return it for revision.", llm=llm, verbose=True ) start_str, end_str = str(st.session_state.start_date), str(st.session_state.end_date) t1 = Task( description=( f"Process the data for the period {start_str} to {end_str}.\n" "1. First, use the 'Data Schema Explorer' to see the actual column names and sample data.\n" "2. Based on your discovery, identify which columns contain crime types (e.g., 'Category' or 'Incident Type') " "and which contain neighborhood/area names.\n" "3. Use the 'Crime Trend Chart Generator' (providing the exact column name you found) and 'Crime Heatmap Generator' " "to extract top stats and hotspots." ), agent=analyst, expected_output="A summary explaining the data schema and providing the exact top crime statistics and hotspots found." ) # Fix: Strict Markdown Template for SITREP to match user requirements t2 = Task( description=f"Write a Situation Report using EXACT numbers from the Analyst. You MUST use this exact markdown structure:\n" f"# SITREP: {start_str} to {end_str}\n\n" "## Overview\n[Write a brief summary of the period]\n\n" "## Threat Breakdown\n[List the top 5 crimes with their exact numbers]\n\n" "## Hotspots\n[List the top neighborhoods with their exact numbers]\n\n" "## Recommendations\n[Provide tactical advice]", agent=writer, expected_output="A strictly formatted SITREP text with numbers." ) t3 = Task( description="Review the SITREP. 1. Strip all triple backticks (```). 2. Ensure all numbers match the Analyst's report. 3. Verify no PII (personal names/addresses) is included. 4. Confirm the Markdown headers are exactly as requested.", agent=auditor, expected_output="A verified, clean Markdown SITREP." ) tasks_list = [t1, t2, t3] agents_list = [analyst, writer, auditor] if analyze_mo: profiler = Agent( role="Behavioral Profiler", goal="Identify Modus Operandi (MO) and patterns, then create official BOLO alerts.", backstory="Expert in predicting criminal behavior. You search for repeating patterns. You MUST start your response with a line of 'Tactical Tags' in brackets like [Nighttime][Forced Entry] followed by your detailed analysis.", tools=[BOLOTool()], llm=llm, verbose=True ) t_mo = Task( description=f"Based on the Analyst's findings for {start_str} to {end_str}, identify 2 specific, actionable BOLO alerts and write a 'Behavioral MO Analysis'. 1. Use the BOLO tool for alerts. 2. For the MO Analysis, identify patterns in time, location, and method. Start with [Tactical Tags].", agent=profiler, expected_output="Behavioral MO Analysis with Tactical Tags." ) agents_list.append(profiler) tasks_list.append(t_mo) # --- Hierarchical Manager Implementation --- manager = Agent( role="Chief of Intelligence", goal="Oversee the crime analysis process and ensure the final SITREP is accurate, actionable, and professionally formatted.", backstory="You are a veteran police chief. You delegate tasks to your team and review their work for quality and accuracy. You only approve reports that meet the highest standards of investigative integrity.", llm=llm, verbose=True ) crew = Crew( agents=agents_list, tasks=tasks_list, verbose=True, process=Process.hierarchical, manager_agent=manager ) crew.kickoff() # Update Guardrail Log (Simulated as part of agentic review) st.session_state.guardrail_results = { "Injection Check": "β CLEARED", "Data Hallucination Check": "β CLEARED (Verified against Analyst Stats)", "PII Filter (Privacy)": "β CLEARED (No sensitive names found)", "Actionability Audit": "β CLEARED (Strategic recommendations provided)", "Markdown Integrity": "β CLEARED" } # Fix: Explicitly grab the output of the final report, avoiding the overwrite bug st.session_state.crew_result = t3.output.raw if hasattr(t3.output, 'raw') else str(t3.output) if analyze_mo: st.session_state.mo_result = t_mo.output.raw if hasattr(t_mo.output, 'raw') else str(t_mo.output) else: st.session_state.mo_result = None # Reset Flag st.session_state.plan_approved = False st.session_state.analysis_plan = None st.success("Analysis Complete!") st.rerun() except Exception as e: st.error(f"Error: {e}") # ========================================= # 7. PERSISTENT DISPLAY # ========================================= if st.session_state.crew_result is not None: tabs_list = ["π Report", "πΊοΈ Map", "π Charts", "π¨ BOLO Center", "π¬ Command Center"] if st.session_state.mo_result: tabs_list.insert(3, "π΅οΈ MO Analysis") tabs = st.tabs(tabs_list) # Map tabs to specific variables based on presence of MO tab_report, tab_map, tab_chart = tabs[0], tabs[1], tabs[2] if st.session_state.mo_result: tab_mo, tab_bolo, tab_command = tabs[3], tabs[4], tabs[5] else: tab_bolo, tab_command = tabs[3], tabs[4] if st.session_state.mo_result: with tab_mo: st.info("π§ Behavioral Insights & Tactical Patterns") mo_text = str(st.session_state.mo_result).strip() # Clean Markdown mo_text = re.sub(r"```(markdown)?", "", mo_text).strip() # Enhancement: Extract and style Tactical Tags tags = re.findall(r"\[(.*?)\]", mo_text) if tags: cols = st.columns(len(tags) if len(tags) < 5 else 5) for i, tag in enumerate(tags[:5]): cols[i].markdown(f"**` {tag.upper()} `**") mo_text = re.sub(r"\[.*?\]", "", mo_text).strip() st.markdown(mo_text) with tab_bolo: col1, col2 = st.columns([1, 2]) with col1: st.subheader("ποΈ Manual BOLO Submission") with st.form("manual_bolo"): m_content = st.text_area("Intelligence/Observation (e.g. 'Blue Sedan seen at jewelry shop')") m_urgency = st.selectbox("Urgency", ["High", "Medium", "Low"]) if st.form_submit_button("π’ Publish Field BOLO"): if m_content: st.session_state.bolo_vault.append({ "source": "Field Officer (Manual)", "content": m_content, "urgency": m_urgency.upper(), "timestamp": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M") }) st.success("Field BOLO Published!") st.rerun() with col2: st.subheader("π‘ Active BOLO Feed") if not st.session_state.bolo_vault: st.info("No active BOLOs. Run analysis or submit a manual entry.") else: for b in reversed(st.session_state.bolo_vault): color = "red" if b["urgency"] == "HIGH" else "orange" if b["urgency"] == "MEDIUM" else "gray" st.markdown(f"""