import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from wordcloud import WordCloud import matplotlib.pyplot as plt import folium from folium.plugins import HeatMap, MarkerCluster from streamlit_folium import st_folium from datetime import datetime, timedelta import re import os from textblob import TextBlob # ------------------------ # Config # ------------------------ st.set_page_config( page_title="Reddit based Drug Crime Intelligence Dashboard", layout="wide", initial_sidebar_state="expanded" ) # Paths to data files POSTS_FILE = "data/processed/reddit_posts_filtered.csv" COMMENTS_FILE = "data/processed/reddit_comments_filtered.csv" WARD_COORDS_FILE = "data/bangalore_wards_coordinates.csv" DISTRICT_COORDS_FILE = "data/karnataka_districts_coordinates.csv" # Drug-related keywords for classification DRUG_KEYWORDS = { 'high_risk': ['dealing', 'dealer', 'supply', 'trafficking', 'smuggling', 'cartel', 'seized', 'arrest', 'raid'], 'substance': ['cocaine', 'heroin', 'mdma', 'meth', 'cannabis', 'marijuana', 'ganja', 'weed', 'lsd', 'ecstasy'], 'activity': ['selling', 'buying', 'distribution', 'possession', 'consumption', 'overdose', 'addiction'] } # ------------------------ # Enhanced Data Loading # ------------------------ @st.cache_data def load_data(posts_file, comments_file, ward_file, district_file): """Load all data files with comprehensive error handling""" data_status = {"posts": False, "comments": False, "wards": False, "districts": False} # Load posts try: posts = pd.read_csv(posts_file, dtype=str) posts = posts.drop_duplicates(subset=['id'], keep='first') data_status["posts"] = True st.sidebar.success(f"✅ Posts loaded: {len(posts)} records") except FileNotFoundError: posts = pd.DataFrame() st.sidebar.warning("⚠️ Reddit posts file not found") except Exception as e: posts = pd.DataFrame() st.sidebar.error(f"❌ Error loading posts: {str(e)}") # Load comments try: comments = pd.read_csv(comments_file) if 'id' in comments.columns: comments = comments.drop_duplicates(subset=['id'], keep='first') data_status["comments"] = True st.sidebar.success(f"✅ Comments loaded: {len(comments)} records") except FileNotFoundError: comments = pd.DataFrame() st.sidebar.warning("⚠️ Reddit comments file not found") except Exception as e: comments = pd.DataFrame() st.sidebar.error(f"❌ Error loading comments: {str(e)}") # Load ward coordinates try: wards = pd.read_csv(ward_file) if 'ward_name' not in wards.columns and 'name' in wards.columns: wards.rename(columns={'name': 'ward_name'}, inplace=True) data_status["wards"] = True st.sidebar.success(f"✅ Wards loaded: {len(wards)} wards") except FileNotFoundError: wards = pd.DataFrame() st.sidebar.warning("⚠️ Ward coordinates file not found") except Exception as e: wards = pd.DataFrame() st.sidebar.error(f"❌ Error loading wards: {str(e)}") # Load district coordinates try: districts = pd.read_csv(district_file) if 'district_name' not in districts.columns and 'name' in districts.columns: districts.rename(columns={'name': 'district_name'}, inplace=True) data_status["districts"] = True st.sidebar.success(f"✅ Districts loaded: {len(districts)} districts") except FileNotFoundError: districts = pd.DataFrame() st.sidebar.warning("⚠️ District coordinates file not found") except Exception as e: districts = pd.DataFrame() st.sidebar.error(f"❌ Error loading districts: {str(e)}") return posts, comments, wards, districts, data_status # ------------------------ # Crime Analysis Functions # ------------------------ def classify_crime_severity(text): """Classify posts by crime severity based on keywords""" text_lower = str(text).lower() severity_score = 0 for keyword in DRUG_KEYWORDS['high_risk']: if keyword in text_lower: severity_score += 3 for keyword in DRUG_KEYWORDS['substance']: if keyword in text_lower: severity_score += 2 for keyword in DRUG_KEYWORDS['activity']: if keyword in text_lower: severity_score += 1 if severity_score >= 5: return 'Critical' elif severity_score >= 3: return 'High' elif severity_score >= 1: return 'Medium' else: return 'Low' def extract_drug_mentions(text): """Extract specific drug mentions from text""" text_lower = str(text).lower() drugs_found = [] for drug in DRUG_KEYWORDS['substance']: if drug in text_lower: drugs_found.append(drug.capitalize()) return ', '.join(drugs_found) if drugs_found else 'Unspecified' def calculate_threat_score(row): """Calculate threat score based on multiple factors""" score = 0 text = str(row.get('text', '')) + ' ' + str(row.get('title', '')) text_lower = text.lower() for keyword in DRUG_KEYWORDS['high_risk']: if keyword in text_lower: score += 10 if 'score' in row: score += min(int(row.get('score', 0)) / 10, 5) if 'num_comments' in row: score += min(int(row.get('num_comments', 0)) / 5, 5) sentiment = TextBlob(text).sentiment.polarity if sentiment < -0.2: score += 5 return min(score, 100) # ------------------------ # Load All Data # ------------------------ posts_df, comments_df, wards_df, districts_df, data_status = load_data( POSTS_FILE, COMMENTS_FILE, WARD_COORDS_FILE, DISTRICT_COORDS_FILE ) # ------------------------ # Data Processing # ------------------------ def process_datetime(df, datetime_col='created_utc'): """Process datetime column with robust error handling""" if datetime_col not in df.columns: return df df["datetime"] = pd.to_datetime(df[datetime_col], errors='coerce') df["date"] = df["datetime"].dt.date df["hour"] = df["datetime"].dt.hour df["day_of_week"] = df["datetime"].dt.day_name() return df # Normalize coordinate names if not wards_df.empty and "ward_name" in wards_df.columns: wards_df["ward_name"] = wards_df["ward_name"].astype(str).str.strip().str.lower() if not districts_df.empty and "district_name" in districts_df.columns: districts_df["district_name"] = districts_df["district_name"].astype(str).str.strip().str.lower() # District mapping district_mapping = { "bangalore": "bengaluru", "blr": "bengaluru", "mysore": "mysuru", } # Create patterns ward_pattern = None district_pattern = None if not wards_df.empty: ward_list = wards_df["ward_name"].str.lower().tolist() ward_pattern = r'\b(' + '|'.join(re.escape(w) for w in ward_list) + r')\b' if not districts_df.empty: district_list = districts_df["district_name"].str.lower().tolist() district_pattern = r'\b(' + '|'.join(re.escape(d) for d in district_list) + r')\b' def extract_locations(text_series, patterns): """Extract locations from text using regex patterns""" locations = [] for text in text_series.fillna(""): matches = [] for pattern in patterns: matches.extend(re.findall(pattern, str(text).lower())) matches = list(set(matches)) locations.append(", ".join(matches)) return pd.Series(locations, index=text_series.index) # Process posts if not posts_df.empty: posts_df = process_datetime(posts_df) post_text = (posts_df.get("title", "") + " " + posts_df.get("text", "")).fillna("") if ward_pattern: posts_df["ward_location"] = extract_locations(post_text, [ward_pattern]) else: posts_df["ward_location"] = "" if district_pattern: posts_df["district_location"] = extract_locations(post_text, [district_pattern]) else: posts_df["district_location"] = "" posts_df["district_location"] = posts_df["district_location"].replace(district_mapping) posts_df["severity"] = post_text.apply(classify_crime_severity) posts_df["drugs_mentioned"] = post_text.apply(extract_drug_mentions) posts_df["threat_score"] = posts_df.apply(calculate_threat_score, axis=1) posts_df["sentiment_score"] = post_text.apply(lambda x: TextBlob(str(x)).sentiment.polarity) posts_df["sentiment"] = posts_df["sentiment_score"].apply( lambda x: "Positive" if x > 0 else ("Negative" if x < 0 else "Neutral") ) # Process comments if not comments_df.empty: comments_df = process_datetime(comments_df) # ------------------------ # Dashboard Header # ------------------------ st.title("🚨 Reddit based Drug Crime Intelligence Dashboard") st.markdown("**Real-time intelligence analysis of drug-related criminal activities from Reddit social media monitoring**") # ------------------------ # Sidebar Filters # ------------------------ st.sidebar.title("🔧 Intelligence Controls") if st.sidebar.button("🔄 Refresh Data"): st.cache_data.clear() st.rerun() # Severity filter if not posts_df.empty and "severity" in posts_df.columns: severity_filter = st.sidebar.multiselect( "⚠️ Crime Severity Level", options=['Critical', 'High', 'Medium', 'Low'], default=['Critical', 'High'] ) if severity_filter: posts_df = posts_df[posts_df["severity"].isin(severity_filter)] # Date range filter if not posts_df.empty and "datetime" in posts_df.columns: min_date = posts_df["datetime"].min().date() max_date = posts_df["datetime"].max().date() date_range = st.sidebar.date_input( "📅 Select Date Range", value=(min_date, max_date), min_value=min_date, max_value=max_date ) if len(date_range) == 2: posts_df = posts_df[ (posts_df["date"] >= date_range[0]) & (posts_df["date"] <= date_range[1]) ] # Subreddit filter if not posts_df.empty and "subreddit" in posts_df.columns: subreddits = st.sidebar.multiselect( "📱 Filter by Subreddits", options=posts_df["subreddit"].unique(), default=posts_df["subreddit"].value_counts().head(5).index.tolist() ) if subreddits: posts_df = posts_df[posts_df["subreddit"].isin(subreddits)] # Keyword search search_keyword = st.sidebar.text_input("🔍 Search Keywords in Content") if search_keyword: posts_df = posts_df[ posts_df["text"].str.contains(search_keyword, case=False, na=False) | posts_df["title"].str.contains(search_keyword, case=False, na=False) ] # ------------------------ # Main Dashboard Content # ------------------------ if posts_df.empty and comments_df.empty: st.error("🚫 No intelligence data available. Please ensure data collection is operational.") st.stop() # --- Crime Intelligence Metrics st.subheader("📊 Crime Intelligence Overview") col1, col2, col3, col4 = st.columns(4) with col1: critical_posts = len(posts_df[posts_df["severity"] == "Critical"]) if "severity" in posts_df.columns else 0 st.metric( label="Critical Threats", value=critical_posts, delta=f"{(critical_posts/len(posts_df)*100):.1f}%" if len(posts_df) > 0 else "0%" ) with col2: avg_threat = posts_df["threat_score"].mean() if "threat_score" in posts_df.columns else 0 st.metric( label="Avg Threat Score", value=f"{avg_threat:.1f}", delta="High" if avg_threat > 50 else "Moderate" ) with col3: if "ward_location" in posts_df.columns: ward_exploded_temp = posts_df[posts_df["ward_location"] != ""].copy() ward_exploded_temp["ward_location"] = ward_exploded_temp["ward_location"].str.split(", ") ward_exploded_temp = ward_exploded_temp.explode("ward_location") unique_locations = ward_exploded_temp["ward_location"].nunique() st.metric( label="Active Locations", value=unique_locations ) with col4: drug_types = posts_df["drugs_mentioned"].str.split(", ").explode().nunique() if "drugs_mentioned" in posts_df.columns else 0 st.metric( label="Drug Types Identified", value=drug_types ) st.markdown("---") # --- Crime Severity Distribution if "severity" in posts_df.columns: st.subheader("⚠️ Crime Severity Analysis") col1, col2 = st.columns(2) with col1: severity_counts = posts_df["severity"].value_counts() fig_severity = px.pie( values=severity_counts.values, names=severity_counts.index, title="Crime Severity Distribution", color=severity_counts.index, color_discrete_map={ 'Critical': '#FF0000', 'High': '#FF6B00', 'Medium': '#FFD700', 'Low': '#90EE90' } ) st.plotly_chart(fig_severity, use_container_width=True) with col2: fig_threat = px.histogram( posts_df, x="threat_score", nbins=20, title="Threat Score Distribution", labels={"threat_score": "Threat Score", "count": "Number of Posts"} ) fig_threat.add_vline(x=50, line_dash="dash", line_color="red", annotation_text="High Threat Threshold") st.plotly_chart(fig_threat, use_container_width=True) st.markdown("---") # --- Drug Type Analysis if "drugs_mentioned" in posts_df.columns: st.subheader("💊 Substance Intelligence") all_drugs = posts_df["drugs_mentioned"].str.split(", ").explode() drug_counts = all_drugs[all_drugs != "Unspecified"].value_counts().head(10) if not drug_counts.empty: fig_drugs = px.bar( x=drug_counts.values, y=drug_counts.index, orientation='h', title="Top 10 Substances Mentioned", labels={"x": "Mentions", "y": "Substance"}, color=drug_counts.values, color_continuous_scale="Reds" ) st.plotly_chart(fig_drugs, use_container_width=True) st.markdown("---") # --- Timeline Analysis if "date" in posts_df.columns: st.subheader("📈 Crime Activity Timeline") col1, col2 = st.columns(2) with col1: daily_data = posts_df.groupby(["date", "severity"]).size().reset_index(name="count") fig_daily = px.line( daily_data, x="date", y="count", color="severity", title="Daily Crime Activity by Severity", labels={"count": "Number of Incidents", "date": "Date"}, color_discrete_map={ 'Critical': '#FF0000', 'High': '#FF6B00', 'Medium': '#FFD700', 'Low': '#90EE90' } ) st.plotly_chart(fig_daily, use_container_width=True) with col2: if "hour" in posts_df.columns and "day_of_week" in posts_df.columns: hourly_activity = posts_df.groupby(["day_of_week", "hour"]).size().reset_index(name="count") fig_hourly = px.density_heatmap( hourly_activity, x="hour", y="day_of_week", z="count", title="Activity Heatmap - High-Risk Hours", labels={"hour": "Hour of Day", "day_of_week": "Day", "count": "Incidents"}, color_continuous_scale="Reds" ) st.plotly_chart(fig_hourly, use_container_width=True) st.markdown("---") # --- Geographic Intelligence - COMBINED MAP st.subheader("🗺️ Geographic Crime Intelligence") # Process both ward and district data ward_data_available = not wards_df.empty and "ward_location" in posts_df.columns district_data_available = not districts_df.empty and "district_location" in posts_df.columns if ward_data_available or district_data_available: st.markdown("**Crime hotspot analysis across Karnataka (Wards & Districts)**") # Prepare ward data merged_wards = pd.DataFrame() if ward_data_available: ward_posts = posts_df[posts_df["ward_location"] != ""].copy() ward_exploded = ward_posts.copy() ward_exploded["ward_location"] = ward_posts["ward_location"].str.split(", ") ward_exploded = ward_exploded.explode("ward_location") ward_exploded["ward_location"] = ward_exploded["ward_location"].str.strip().str.lower() loc_counts = ward_exploded.groupby("ward_location").size().reset_index(name="count") merged_wards = pd.merge(loc_counts, wards_df, left_on="ward_location", right_on="ward_name", how="inner") merged_wards["location_type"] = "Ward" merged_wards["location_name"] = merged_wards["ward_name"] # Prepare district data merged_districts = pd.DataFrame() if district_data_available: district_posts = posts_df[posts_df["district_location"] != ""].copy() district_exploded = district_posts.copy() district_exploded["district_location"] = district_posts["district_location"].str.split(", ") district_exploded = district_exploded.explode("district_location") district_exploded["district_location"] = district_exploded["district_location"].str.strip().str.lower() district_counts = district_exploded.groupby("district_location").size().reset_index(name="count") merged_districts = pd.merge(district_counts, districts_df, left_on="district_location", right_on="district_name", how="inner") merged_districts["location_type"] = "District" merged_districts["location_name"] = merged_districts["district_name"] # Combine both datasets all_locations = pd.concat([merged_wards, merged_districts], ignore_index=True) if not all_locations.empty: # Determine center of map center_lat = all_locations["lat"].mean() center_lon = all_locations["lon"].mean() # Create unified map m_unified = folium.Map( location=[center_lat, center_lon], zoom_start=9 if ward_data_available else 7, tiles="OpenStreetMap" ) # Add heatmap layer heat_data = [[row["lat"], row["lon"], row["count"]] for _, row in all_locations.iterrows()] HeatMap(heat_data, radius=20, blur=15, max_zoom=13, gradient={ 0.0: 'blue', 0.5: 'yellow', 0.75: 'orange', 1.0: 'red' }).add_to(m_unified) # Determine hotspot threshold threshold = all_locations["count"].quantile(0.70) all_locations["is_hotspot"] = all_locations["count"] >= threshold # Add markers for each location for _, row in all_locations.iterrows(): location_name = row["location_name"].title() location_type = row["location_type"] incident_count = row["count"] # Get location-specific crime data if location_type == "Ward": loc_data = posts_df[posts_df["ward_location"].str.contains(row["location_name"], case=False, na=False)] else: loc_data = posts_df[posts_df["district_location"].str.contains(row["location_name"], case=False, na=False)] # Severity breakdown severity_breakdown = loc_data["severity"].value_counts().to_dict() severity_html = "
".join([f"  • {sev}: {count}" for sev, count in severity_breakdown.items()]) # Critical incidents count critical_count = severity_breakdown.get("Critical", 0) # Top drugs in this location loc_drugs = loc_data["drugs_mentioned"].str.split(", ").explode() top_drugs = loc_drugs[loc_drugs != "Unspecified"].value_counts().head(3) drugs_html = "
".join([f"  • {drug}: {count}" for drug, count in top_drugs.items()]) # Average threat score avg_threat = loc_data["threat_score"].mean() # Recent high-threat incidents recent = loc_data.nlargest(3, "threat_score")[["title", "severity", "threat_score"]] incidents_html = "
".join([ f"  • [{r['severity']}] {r['title'][:50]}... (Score: {r['threat_score']:.0f})" for _, r in recent.iterrows() ]) # Marker color based on severity marker_color = 'darkred' if row["is_hotspot"] else ('red' if incident_count >= 5 else ('orange' if incident_count >= 3 else 'blue')) # Icon based on type icon_symbol = 'home' if location_type == "Ward" else 'map' # Create detailed popup popup_html = f"""

{location_type}: {location_name}

📊 Total Incidents: {incident_count}
🚨 Critical Threats: {critical_count}
📈 Avg Threat Score: {avg_threat:.1f}/100

⚠️ Severity Breakdown:
{severity_html if severity_html else '  No data'}

💊 Top Substances Detected:
{drugs_html if not top_drugs.empty else '  None identified'}

🎯 Recent High-Threat Incidents:
{incidents_html if not recent.empty else '  None'}
Click marker for details • Hover for quick info
""" # Tooltip (hover text) tooltip_text = f""" {location_type}: {location_name}
Total Incidents: {incident_count}
Critical: {critical_count} | Avg Threat: {avg_threat:.1f} """ # Add marker folium.CircleMarker( location=[row["lat"], row["lon"]], radius=min(incident_count * 2.5 if location_type == "Ward" else incident_count * 3.5, 25), color=marker_color, fill=True, fill_color=marker_color, fill_opacity=0.7, weight=2, popup=folium.Popup(popup_html, max_width=400), tooltip=folium.Tooltip(tooltip_text, sticky=True) ).add_to(m_unified) # Display map st_folium(m_unified, width="100%", height=700) # Hotspot analysis table st.subheader("🔥 Top Crime Hotspots") col1 = st.columns(1) with col1[0]: st.markdown("**High-Activity Wards**") if not merged_wards.empty: ward_display = merged_wards.sort_values("count", ascending=False).head(10) st.dataframe( ward_display[["ward_name", "count"]].rename(columns={ "ward_name": "Ward Name", "count": "Incidents" }).reset_index(drop=True), use_container_width=True, height=300 ) else: st.info("No ward data available") st.markdown("---") # --- High-Priority Intelligence Reports st.subheader("🚨 High-Priority Intelligence Reports") if not posts_df.empty: priority_posts = posts_df[ (posts_df["severity"].isin(['Critical', 'High'])) | (posts_df["threat_score"] >= 50) ].sort_values("threat_score", ascending=False) if not priority_posts.empty: priority_posts = priority_posts.drop_duplicates(subset=['id'], keep='first') display_cols = ["datetime", "title", "severity", "threat_score", "drugs_mentioned", "ward_location", "subreddit"] available_cols = [col for col in display_cols if col in priority_posts.columns] st.dataframe( priority_posts[available_cols].head(50).rename(columns={ "datetime": "Timestamp", "title": "Intelligence Report", "severity": "Severity", "threat_score": "Threat Score", "drugs_mentioned": "Substances", "ward_location": "Location", "subreddit": "Source" }), use_container_width=True, height=400 ) st.download_button( label="📥 Download Priority Reports (CSV)", data=priority_posts[available_cols].to_csv(index=False).encode("utf-8"), file_name=f"priority_intelligence_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) else: st.info("No high-priority incidents in selected date range") else: st.info("No intelligence data available") st.markdown("---") # --- Advanced Analytics Section st.subheader("🔬 Advanced Crime Analytics") col1, col2 = st.columns(2) with col1: if "hour" in posts_df.columns and "severity" in posts_df.columns: st.markdown("**Crime Patterns by Time of Day**") time_severity = posts_df.groupby(["hour", "severity"]).size().reset_index(name="count") fig_time = px.bar( time_severity, x="hour", y="count", color="severity", title="Crime Activity by Hour and Severity", labels={"hour": "Hour of Day", "count": "Incidents"}, color_discrete_map={ 'Critical': '#FF0000', 'High': '#FF6B00', 'Medium': '#FFD700', 'Low': '#90EE90' } ) st.plotly_chart(fig_time, use_container_width=True) with col2: if "sentiment_score" in posts_df.columns and "severity" in posts_df.columns: st.markdown("**Sentiment vs Crime Severity**") fig_sentiment_severity = px.box( posts_df, x="severity", y="sentiment_score", color="severity", title="Sentiment Distribution by Crime Severity", labels={"sentiment_score": "Sentiment Score", "severity": "Crime Severity"}, color_discrete_map={ 'Critical': '#FF0000', 'High': '#FF6B00', 'Medium': '#FFD700', 'Low': '#90EE90' } ) st.plotly_chart(fig_sentiment_severity, use_container_width=True) st.markdown("---") # --- Network Analysis if "subreddit" in posts_df.columns and "drugs_mentioned" in posts_df.columns: st.subheader("🕸️ Source-Substance Network Analysis") source_drug = posts_df[posts_df["drugs_mentioned"] != "Unspecified"].groupby( ["subreddit", "drugs_mentioned"] ).size().reset_index(name="mentions") if not source_drug.empty: top_relationships = source_drug.nlargest(15, "mentions") fig_network = px.bar( top_relationships, x="mentions", y="subreddit", color="drugs_mentioned", orientation='h', title="Top Source-Substance Relationships", labels={"mentions": "Number of Mentions", "subreddit": "Source Community"}, height=500 ) st.plotly_chart(fig_network, use_container_width=True) st.markdown("---") # --- Emerging Threats Detection st.subheader("⚡ Emerging Threats Detection") if "date" in posts_df.columns and "threat_score" in posts_df.columns: today = posts_df["date"].max() last_week = today - timedelta(days=7) prev_week = last_week - timedelta(days=7) recent_threats = posts_df[posts_df["date"] >= last_week]["threat_score"].mean() previous_threats = posts_df[(posts_df["date"] >= prev_week) & (posts_df["date"] < last_week)]["threat_score"].mean() threat_change = ((recent_threats - previous_threats) / previous_threats * 100) if previous_threats > 0 else 0 col1, col2, col3 = st.columns(3) with col1: st.metric( "Threat Level Trend", f"{recent_threats:.1f}", f"{threat_change:+.1f}%", delta_color="inverse" ) with col2: recent_locs = set(posts_df[posts_df["date"] >= last_week]["ward_location"].str.split(", ").explode()) prev_locs = set(posts_df[posts_df["date"] < last_week]["ward_location"].str.split(", ").explode()) new_locations = len(recent_locs - prev_locs) st.metric("New Active Locations", new_locations) with col3: daily_avg = posts_df.groupby("date").size().mean() recent_avg = posts_df[posts_df["date"] >= last_week].groupby("date").size().mean() spike = recent_avg > daily_avg * 1.5 st.metric("Activity Status", "⚠️ SPIKE" if spike else "✅ Normal") st.markdown("---") # --- Intelligence Summary Report st.subheader("📋 Executive Intelligence Summary") summary_col1, summary_col2 = st.columns(2) with summary_col1: st.markdown("**Key Findings:**") if not posts_df.empty: if "ward_location" in posts_df.columns and "threat_score" in posts_df.columns: ward_posts_with_location = posts_df[posts_df["ward_location"] != ""].copy() if not ward_posts_with_location.empty: ward_exploded_threat = ward_posts_with_location.copy() ward_exploded_threat["ward_location"] = ward_posts_with_location["ward_location"].str.split(", ") ward_exploded_threat = ward_exploded_threat.explode("ward_location").reset_index(drop=True) ward_threat = ward_exploded_threat.groupby("ward_location")["threat_score"].mean().sort_values(ascending=False) if not ward_threat.empty: st.markdown(f"🎯 **Highest Threat Zone:** {ward_threat.index[0].title()} (Score: {ward_threat.iloc[0]:.1f})") if "drugs_mentioned" in posts_df.columns: top_drug = posts_df["drugs_mentioned"].str.split(", ").explode().value_counts() if len(top_drug) > 0 and top_drug.index[0] != "Unspecified": st.markdown(f"💊 **Primary Substance:** {top_drug.index[0]} ({top_drug.iloc[0]} mentions)") if "hour" in posts_df.columns: peak_hour = posts_df["hour"].mode()[0] st.markdown(f"🕐 **Peak Activity Time:** {peak_hour}:00 - {peak_hour+1}:00") if "subreddit" in posts_df.columns: top_source = posts_df["subreddit"].value_counts().index[0] st.markdown(f"📱 **Primary Intelligence Source:** r/{top_source}") with summary_col2: st.markdown("**Risk Assessment:**") if not posts_df.empty and "severity" in posts_df.columns: critical_pct = (len(posts_df[posts_df["severity"] == "Critical"]) / len(posts_df) * 100) if critical_pct > 30: risk_level = "🔴 CRITICAL" risk_desc = "Immediate action required" elif critical_pct > 15: risk_level = "🟠 HIGH" risk_desc = "Enhanced monitoring recommended" elif critical_pct > 5: risk_level = "🟡 MODERATE" risk_desc = "Standard surveillance protocols" else: risk_level = "🟢 LOW" risk_desc = "Routine monitoring sufficient" st.markdown(f"**Overall Risk Level:** {risk_level}") st.markdown(f"*{risk_desc}*") st.markdown(f"- Critical incidents: {critical_pct:.1f}%") st.markdown(f"- Total monitored incidents: {len(posts_df)}") st.markdown(f"- Date range: {posts_df['date'].min()} to {posts_df['date'].max()}") st.markdown("---") # --- Export Options st.subheader("📤 Export Intelligence Reports") export_col1, export_col2, export_col3 = st.columns(3) with export_col1: if not posts_df.empty: full_export = posts_df.to_csv(index=False).encode("utf-8") st.download_button( label="📊 Full Dataset", data=full_export, file_name=f"intelligence_full_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) with export_col2: if "severity" in posts_df.columns: critical_data = posts_df[posts_df["severity"] == "Critical"] if not critical_data.empty: critical_export = critical_data.to_csv(index=False).encode("utf-8") st.download_button( label="🚨 Critical Incidents", data=critical_export, file_name=f"critical_incidents_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) with export_col3: if 'merged_wards' in locals() and not merged_wards.empty: location_export = merged_wards.to_csv(index=False).encode("utf-8") st.download_button( label="🗺️ Location Analysis", data=location_export, file_name=f"location_analysis_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) st.markdown("---") # --- System Status Footer st.markdown("**🔒 Intelligence System Status:**") status_cols = st.columns(4) with status_cols[0]: st.write("📄 Posts:", "✅ Online" if data_status["posts"] else "❌ Offline") with status_cols[1]: st.write("💬 Comments:", "✅ Online" if data_status["comments"] else "❌ Offline") with status_cols[2]: st.write("🏘️ Wards:", "✅ Online" if data_status["wards"] else "❌ Offline") with status_cols[3]: st.write("🌍 Districts:", "✅ Online" if data_status["districts"] else "❌ Offline") try: file_mod_time = datetime.fromtimestamp(os.path.getmtime(POSTS_FILE)) st.markdown(f"*Intelligence data last updated: {file_mod_time.strftime('%Y-%m-%d %H:%M:%S')}*") except: pass st.markdown("---")