Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import folium | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import os | |
| import tempfile | |
| import sys | |
| import re | |
| # --- Disable Telemetry --- | |
| os.environ["CREWAI_TELEMETRY_OPT_OUT"] = "true" | |
| import streamlit.components.v1 as components | |
| from crewai import Agent, Task, Crew, Process | |
| from langchain_openai import ChatOpenAI | |
| from crewai.tools import BaseTool | |
| from fpdf import FPDF | |
| # ========================================= | |
| # 1. PAGE CONFIGURATION | |
| # ========================================= | |
| st.set_page_config(page_title="Crime Copilot Dashboard", layout="wide", page_icon="π") | |
| st.title("π AI Crime Intelligence Dashboard") | |
| st.markdown("---") | |
| # ========================================= | |
| # 2. HELPER FUNCTIONS (PDF & GUARDRAILS) | |
| # ========================================= | |
| def create_pdf(report_text): | |
| """Creates a 100% Adobe-compatible PDF using a physical temporary file.""" | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| # Title | |
| pdf.set_font("Arial", "B", 16) | |
| pdf.cell(200, 10, "Situation Report (SITREP)", ln=True, align="C") | |
| pdf.ln(10) | |
| # Body | |
| pdf.set_font("Arial", size=12) | |
| # Clean up markdown for PDF text | |
| clean_text = report_text.replace("**", "").replace("## ", "").replace("### ", "").replace("# ", "") | |
| clean_text = clean_text.replace("```markdown", "").replace("```", "") | |
| clean_text = clean_text.encode('latin-1', 'replace').decode('latin-1') | |
| pdf.multi_cell(0, 8, clean_text) | |
| # Write to a physical temp file to guarantee Adobe compatibility | |
| fd, temp_path = tempfile.mkstemp(suffix=".pdf") | |
| os.close(fd) # Close file descriptor so FPDF can use it | |
| pdf.output(temp_path, "F") | |
| # Read pure binary data back | |
| with open(temp_path, "rb") as f: | |
| pdf_bytes = f.read() | |
| os.remove(temp_path) # Clean up | |
| return pdf_bytes | |
| def validate_data_guardrails(df): | |
| """Scans for prompt injection attacks.""" | |
| suspicious_phrases = ["ignore previous instructions", "disregard all previous", "you are an ai", "bypass instructions"] | |
| for col in df.columns: | |
| for phrase in suspicious_phrases: | |
| if phrase in str(col).lower(): | |
| return False, f"Prompt injection detected in column: '{col}'" | |
| str_cols = df.select_dtypes(include=['object']).columns | |
| for col in str_cols: | |
| for val in df[col].dropna().head(500): | |
| for phrase in suspicious_phrases: | |
| if phrase in str(val).lower(): | |
| return False, f"Prompt injection detected in data." | |
| return True, "Passed" | |
| # ========================================= | |
| # 3. SESSION STATE SETUP | |
| # ========================================= | |
| if 'data_cache' not in st.session_state: st.session_state.data_cache = None | |
| if 'crew_result' not in st.session_state: st.session_state.crew_result = None | |
| if 'mo_result' not in st.session_state: st.session_state.mo_result = None | |
| if 'current_filename' not in st.session_state: st.session_state.current_filename = "" | |
| if 'start_date' not in st.session_state: st.session_state.start_date = None | |
| if 'end_date' not in st.session_state: st.session_state.end_date = None | |
| # ========================================= | |
| # 4. SIDEBAR & DATA LOADING | |
| # ========================================= | |
| with st.sidebar: | |
| st.header("βοΈ Configuration") | |
| #api_key_input = st.text_input("OpenAI API Key", type="password") | |
| #if api_key_input: os.environ["OPENAI_API_KEY"] = api_key_input | |
| # Make the API key input optional for reviewers | |
| api_key_input = st.text_input("OpenAI API Key (Leave blank to use Demo Key)", type="password") | |
| # If the user types a key, use it. Otherwise, Hugging Face will automatically | |
| # use the secret OPENAI_API_KEY environment variable we set in the settings. | |
| if api_key_input: | |
| os.environ["OPENAI_API_KEY"] = api_key_input | |
| st.header("π Data Upload") | |
| # uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader") | |
| # Provide the sample dataset download | |
| with open("sample_crime_data.csv", "rb") as file: | |
| st.download_button( | |
| label="β¬οΈ Download Sample Crime Data", | |
| data=file, | |
| file_name="sample_crime_data.csv", | |
| mime="text/csv", | |
| help="Download this file and upload it below to test the dashboard." | |
| ) | |
| uploaded_file = st.file_uploader("Upload Crime CSV", type=["csv"], key="csv_uploader") | |
| date_filter_container = st.container() | |
| st.markdown("---") | |
| analyze_mo = st.checkbox("π΅οΈ Analyse Crime Operandi (MO)", value=False, help="Uses an additional AI Profiler to detect patterns to alert patrol officers.") | |
| if st.session_state.crew_result is not None: | |
| st.markdown("---") | |
| st.header("π₯ Export Options") | |
| result_obj = st.session_state.crew_result | |
| report_text = result_obj.raw if hasattr(result_obj, 'raw') and isinstance(result_obj.raw, str) else str(result_obj) | |
| pdf_bytes = create_pdf(report_text) | |
| dynamic_filename = f"SITREP_{st.session_state.start_date}_to_{st.session_state.end_date}.pdf" | |
| st.download_button( | |
| label="π Download SITREP (PDF)", | |
| data=pdf_bytes, | |
| file_name=dynamic_filename, | |
| mime="application/pdf", | |
| type="primary" | |
| ) | |
| def load_raw_data(file): | |
| try: return pd.read_csv(file, low_memory=False) | |
| except Exception as e: return None | |
| if uploaded_file: | |
| if uploaded_file.name != st.session_state.current_filename: | |
| st.session_state.data_cache = None | |
| st.session_state.crew_result = None | |
| st.session_state.mo_result = None | |
| st.session_state.current_filename = uploaded_file.name | |
| st.rerun() | |
| raw_df = load_raw_data(uploaded_file) | |
| if raw_df is not None: | |
| lat_col = next((col for col in raw_df.columns if 'lat' in col.lower() or col.lower() == 'y'), None) | |
| lon_col = next((col for col in raw_df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or col.lower() == 'x'), None) | |
| if lat_col and lon_col: | |
| raw_df[lat_col] = pd.to_numeric(raw_df[lat_col], errors='coerce') | |
| raw_df[lon_col] = pd.to_numeric(raw_df[lon_col], errors='coerce') | |
| raw_df = raw_df.dropna(subset=[lat_col, lon_col]) | |
| date_col = next((col for col in raw_df.columns if 'date' in col.lower() and 'time' not in col.lower()), None) | |
| if not date_col: date_col = next((col for col in raw_df.columns if 'datetime' in col.lower()), None) | |
| if date_col: | |
| raw_df[date_col] = pd.to_datetime(raw_df[date_col], errors='coerce', dayfirst=False) | |
| raw_df = raw_df.dropna(subset=[date_col]) | |
| min_date, max_date = raw_df[date_col].min().date(), raw_df[date_col].max().date() | |
| # Fix: Group the stats UI neatly | |
| with date_filter_container: | |
| st.header("π Analysis Period") | |
| start = st.date_input("Start Date", min_date, min_value=min_date, max_value=max_date) | |
| end = st.date_input("End Date", max_date, min_value=min_date, max_value=max_date) | |
| st.session_state.start_date, st.session_state.end_date = start, end | |
| mask = (raw_df[date_col].dt.date >= start) & (raw_df[date_col].dt.date <= end) | |
| st.session_state.data_cache = raw_df.loc[mask] | |
| # Styled highlight boxes | |
| st.info(f"Total Rows in File: **{len(raw_df)}**") | |
| st.success(f"Rows in Selected Dates: **{len(st.session_state.data_cache)}**") | |
| if lat_col and lon_col: | |
| st.caption(f"π **Map Ready Points:** {len(st.session_state.data_cache)}") | |
| # ========================================= | |
| # 5. TOOLS | |
| # ========================================= | |
| class MapVizTool(BaseTool): | |
| name: str = "Crime Heatmap Generator" | |
| description: str = "Analyzes location data." | |
| def _run(self, dummy_arg: str) -> str: | |
| df = st.session_state.data_cache | |
| if df is None or df.empty: return "Error: No data." | |
| neigh_col = next((col for col in df.columns if 'neighbor' in col.lower() or 'analysis' in col.lower()), None) | |
| if neigh_col: return f"Top 3 High-Crime Neighborhoods:\n{df[neigh_col].value_counts().head(3).to_string()}" | |
| return "Locations processed." | |
| class ChartVizTool(BaseTool): | |
| name: str = "Crime Trend Chart Generator" | |
| description: str = "Generates a trend chart." | |
| def _run(self, dummy_arg: str) -> str: | |
| df = st.session_state.data_cache | |
| if df is None or df.empty: return "Error: No data." | |
| # Fix: Strict column detection prioritizing "Incident Category" | |
| cat_col = None | |
| for col in df.columns: | |
| if 'incident category' in col.lower(): | |
| cat_col = col | |
| break | |
| if not cat_col: | |
| for col in df.columns: | |
| if 'category' in col.lower(): | |
| cat_col = col | |
| break | |
| if not cat_col: | |
| cat_col = next((col for col in df.columns if 'desc' in col.lower() or 'type' in col.lower()), None) | |
| if not cat_col: return "Error: No category column." | |
| plt.figure(figsize=(10, 6)) | |
| top_crimes = df[cat_col].value_counts().head(5) | |
| sns.barplot(x=top_crimes.values, y=top_crimes.index, hue=top_crimes.index, palette="magma", legend=False) | |
| plt.title("Top 5 Crime Trends") | |
| plt.xlabel("Count") | |
| plt.ylabel(cat_col) | |
| plt.tight_layout() | |
| plt.savefig("crime_chart.png") | |
| plt.close() | |
| return f"Chart Data: Top 5 Crime Categories:\n{top_crimes.to_string()}" | |
| # ========================================= | |
| # 6. EXECUTION | |
| # ========================================= | |
| if st.button("π Run Analysis", type="primary"): | |
| if not os.environ.get("OPENAI_API_KEY"): st.error("Please enter your API Key."); st.stop() | |
| if st.session_state.data_cache is None: st.error("β No dataset loaded!"); st.stop() | |
| is_safe, security_msg = validate_data_guardrails(st.session_state.data_cache) | |
| if not is_safe: st.error(f"π¨ Security Alert: {security_msg}"); st.stop() | |
| else: st.success("β LLM Guardrails Validation Passed") | |
| with st.spinner("π€ AI Agents are generating intelligence..."): | |
| try: | |
| llm = ChatOpenAI(model="gpt-4o", verbose=True, temperature=0.3) | |
| analyst = Agent( | |
| role="Senior Analyst", | |
| goal="Analyze crime statistics accurately.", | |
| backstory="You provide hard numbers on top crime types and locations.", | |
| tools=[MapVizTool(), ChartVizTool()], | |
| llm=llm, | |
| verbose=True | |
| ) | |
| writer = Agent( | |
| role="Commander", | |
| goal="Write a detailed Situation Report (SITREP).", | |
| backstory="You write executive summaries. You MUST use the exact numbers provided by the Analyst.", | |
| llm=llm, | |
| verbose=True | |
| ) | |
| auditor = Agent( | |
| role="Formatter", | |
| goal="Ensure strict Markdown formatting.", | |
| backstory="Formats reports perfectly. Removes all triple backticks (```) from output.", | |
| llm=llm, | |
| verbose=True | |
| ) | |
| start_str, end_str = str(st.session_state.start_date), str(st.session_state.end_date) | |
| t1 = Task( | |
| description=f"Analyze data from {start_str} to {end_str}. 1. Use the Chart Tool to get top crime stats. 2. Use the Map Tool to get hotspot neighborhoods.", | |
| agent=analyst, | |
| expected_output="A summary of top crime stats and hotspots." | |
| ) | |
| # Fix: Strict Markdown Template for SITREP to match user requirements | |
| t2 = Task( | |
| description=f"Write a Situation Report using EXACT numbers from the Analyst. You MUST use this exact markdown structure:\n" | |
| f"# SITREP: {start_str} to {end_str}\n\n" | |
| "## Overview\n[Write a brief summary of the period]\n\n" | |
| "## Threat Breakdown\n[List the top 5 crimes with their exact numbers]\n\n" | |
| "## Hotspots\n[List the top neighborhoods with their exact numbers]\n\n" | |
| "## Recommendations\n[Provide tactical advice]", | |
| agent=writer, | |
| expected_output="A strictly formatted SITREP text with numbers." | |
| ) | |
| t3 = Task( | |
| description="Review the SITREP. Ensure it strictly uses the specific Markdown headers provided (# SITREP, ## Overview, ## Threat Breakdown, ## Hotspots, ## Recommendations). Strip any markdown code fences (```) from the final output.", | |
| agent=auditor, | |
| expected_output="Clean Markdown report." | |
| ) | |
| tasks_list = [t1, t2, t3] | |
| agents_list = [analyst, writer, auditor] | |
| if analyze_mo: | |
| profiler = Agent( | |
| role="Behavioral Profiler", | |
| goal="Identify Modus Operandi (MO) and patterns.", | |
| backstory="Expert in predicting criminal behavior and generating BOLO (Be On the Look Out) alerts.", | |
| llm=llm, | |
| verbose=True | |
| ) | |
| t_mo = Task( | |
| description=f"Based on the Analyst's findings for {start_str} to {end_str}, write a 'Tactical MO Alert'. Identify the most likely operational patterns. Provide 3 bullet points for patrol officers.", | |
| agent=profiler, | |
| expected_output="Tactical MO Alert." | |
| ) | |
| agents_list.append(profiler) | |
| tasks_list.append(t_mo) | |
| crew = Crew(agents=agents_list, tasks=tasks_list, verbose=True) | |
| crew.kickoff() | |
| # Fix: Explicitly grab the output of the final report, avoiding the overwrite bug | |
| st.session_state.crew_result = t3.output.raw if hasattr(t3.output, 'raw') else str(t3.output) | |
| if analyze_mo: | |
| st.session_state.mo_result = t_mo.output.raw if hasattr(t_mo.output, 'raw') else str(t_mo.output) | |
| else: | |
| st.session_state.mo_result = None | |
| st.success("Analysis Complete!") | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Error: {e}") | |
| # ========================================= | |
| # 7. PERSISTENT DISPLAY | |
| # ========================================= | |
| if st.session_state.crew_result is not None: | |
| if st.session_state.mo_result: | |
| tabs = st.tabs(["π Report", "πΊοΈ Map", "π Charts", "π΅οΈ MO Analysis"]) | |
| tab_report, tab_map, tab_chart, tab_mo = tabs[0], tabs[1], tabs[2], tabs[3] | |
| else: | |
| tabs = st.tabs(["π Report", "πΊοΈ Map", "π Charts"]) | |
| tab_report, tab_map, tab_chart = tabs[0], tabs[1], tabs[2] | |
| with tab_report: | |
| res = st.session_state.crew_result | |
| report_text = str(res) | |
| # Clean Markdown Fences | |
| report_text = report_text.strip() | |
| if report_text.lower().startswith("```markdown"): report_text = report_text[11:] | |
| elif report_text.startswith("```"): report_text = report_text[3:] | |
| if report_text.endswith("```"): report_text = report_text[:-3] | |
| st.markdown(report_text.strip(), unsafe_allow_html=True) | |
| with tab_map: | |
| df = st.session_state.data_cache | |
| lat_col = next((col for col in df.columns if 'lat' in col.lower() or 'y' == col.lower()), None) | |
| lon_col = next((col for col in df.columns if 'lon' in col.lower() or 'long' in col.lower() or 'lng' in col.lower() or 'x' == col.lower()), None) | |
| if lat_col and lon_col: | |
| map_data = df.dropna(subset=[lat_col, lon_col]) | |
| if not map_data.empty: | |
| m = folium.Map(location=[map_data[lat_col].mean(), map_data[lon_col].mean()], tiles='CartoDB positron', zoom_start=11) | |
| from folium.plugins import HeatMap | |
| HeatMap(map_data[[lat_col, lon_col]].head(5000).values.tolist(), radius=12, blur=15, min_opacity=0.4, gradient={0.4: 'blue', 0.65: 'lime', 1: 'red'}).add_to(m) | |
| m.fit_bounds([map_data[[lat_col, lon_col]].min().values.tolist(), map_data[[lat_col, lon_col]].max().values.tolist()]) | |
| components.html(m._repr_html_(), height=500) | |
| with tab_chart: | |
| # Crime Category Chart | |
| st.markdown("### π Top Crime Categories") | |
| if os.path.exists("crime_chart.png"): st.image("crime_chart.png") | |
| # --- NEW QUICK WIN: Time of Day Analysis Chart --- | |
| st.markdown("---") | |
| st.markdown("### β° Incidents by Time of Day") | |
| df_chart = st.session_state.data_cache | |
| if df_chart is not None and not df_chart.empty: | |
| # Look for Time or Datetime columns | |
| time_col = next((col for col in df_chart.columns if 'time' in col.lower() and 'datetime' not in col.lower()), None) | |
| dt_col = next((col for col in df_chart.columns if 'datetime' in col.lower()), None) | |
| hours = None | |
| if time_col: | |
| hours = pd.to_datetime(df_chart[time_col], format='%H:%M', errors='coerce').dt.hour | |
| if hours.isna().all(): | |
| hours = pd.to_datetime(df_chart[time_col], errors='coerce').dt.hour | |
| elif dt_col: | |
| hours = pd.to_datetime(df_chart[dt_col], errors='coerce').dt.hour | |
| else: | |
| date_col_fallback = next((col for col in df_chart.columns if 'date' in col.lower()), None) | |
| if date_col_fallback: | |
| hours = pd.to_datetime(df_chart[date_col_fallback], errors='coerce').dt.hour | |
| if hours is not None and not hours.isna().all(): | |
| hourly_counts = hours.value_counts().sort_index() | |
| fig, ax = plt.subplots(figsize=(10, 4)) | |
| sns.barplot(x=hourly_counts.index.astype(int), y=hourly_counts.values, palette="coolwarm", ax=ax) | |
| ax.set_xlabel("Hour of Day (0-23)") | |
| ax.set_ylabel("Number of Incidents") | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| else: | |
| st.info("Time data not available or parseable in this dataset.") | |
| if st.session_state.mo_result: | |
| with tab_mo: | |
| st.info("π§ Insights generated by the Behavioral Profiler Agent") | |
| mo_text = str(st.session_state.mo_result).strip() | |
| if mo_text.lower().startswith("```markdown"): mo_text = mo_text[11:] | |
| elif mo_text.startswith("```"): mo_text = mo_text[3:] | |
| if mo_text.endswith("```"): mo_text = mo_text[:-3] | |
| st.markdown(mo_text.strip()) |