Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| import folium | |
| from folium.plugins import HeatMap, MarkerCluster | |
| from streamlit_folium import st_folium | |
| from datetime import datetime, timedelta | |
| import re | |
| import os | |
| from textblob import TextBlob | |
| # ------------------------ | |
| # Config | |
| # ------------------------ | |
| st.set_page_config( | |
| page_title="Reddit based Drug Crime Intelligence Dashboard", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Paths to data files | |
| POSTS_FILE = "data/processed/reddit_posts_filtered.csv" | |
| COMMENTS_FILE = "data/processed/reddit_comments_filtered.csv" | |
| WARD_COORDS_FILE = "data/bangalore_wards_coordinates.csv" | |
| DISTRICT_COORDS_FILE = "data/karnataka_districts_coordinates.csv" | |
| # Drug-related keywords for classification | |
| DRUG_KEYWORDS = { | |
| 'high_risk': ['dealing', 'dealer', 'supply', 'trafficking', 'smuggling', 'cartel', 'seized', 'arrest', 'raid'], | |
| 'substance': ['cocaine', 'heroin', 'mdma', 'meth', 'cannabis', 'marijuana', 'ganja', 'weed', 'lsd', 'ecstasy'], | |
| 'activity': ['selling', 'buying', 'distribution', 'possession', 'consumption', 'overdose', 'addiction'] | |
| } | |
| # ------------------------ | |
| # Enhanced Data Loading | |
| # ------------------------ | |
| def load_data(posts_file, comments_file, ward_file, district_file): | |
| """Load all data files with comprehensive error handling""" | |
| data_status = {"posts": False, "comments": False, "wards": False, "districts": False} | |
| # Load posts | |
| try: | |
| posts = pd.read_csv(posts_file, dtype=str) | |
| posts = posts.drop_duplicates(subset=['id'], keep='first') | |
| data_status["posts"] = True | |
| st.sidebar.success(f"β Posts loaded: {len(posts)} records") | |
| except FileNotFoundError: | |
| posts = pd.DataFrame() | |
| st.sidebar.warning("β οΈ Reddit posts file not found") | |
| except Exception as e: | |
| posts = pd.DataFrame() | |
| st.sidebar.error(f"β Error loading posts: {str(e)}") | |
| # Load comments | |
| try: | |
| comments = pd.read_csv(comments_file) | |
| if 'id' in comments.columns: | |
| comments = comments.drop_duplicates(subset=['id'], keep='first') | |
| data_status["comments"] = True | |
| st.sidebar.success(f"β Comments loaded: {len(comments)} records") | |
| except FileNotFoundError: | |
| comments = pd.DataFrame() | |
| st.sidebar.warning("β οΈ Reddit comments file not found") | |
| except Exception as e: | |
| comments = pd.DataFrame() | |
| st.sidebar.error(f"β Error loading comments: {str(e)}") | |
| # Load ward coordinates | |
| try: | |
| wards = pd.read_csv(ward_file) | |
| if 'ward_name' not in wards.columns and 'name' in wards.columns: | |
| wards.rename(columns={'name': 'ward_name'}, inplace=True) | |
| data_status["wards"] = True | |
| st.sidebar.success(f"β Wards loaded: {len(wards)} wards") | |
| except FileNotFoundError: | |
| wards = pd.DataFrame() | |
| st.sidebar.warning("β οΈ Ward coordinates file not found") | |
| except Exception as e: | |
| wards = pd.DataFrame() | |
| st.sidebar.error(f"β Error loading wards: {str(e)}") | |
| # Load district coordinates | |
| try: | |
| districts = pd.read_csv(district_file) | |
| if 'district_name' not in districts.columns and 'name' in districts.columns: | |
| districts.rename(columns={'name': 'district_name'}, inplace=True) | |
| data_status["districts"] = True | |
| st.sidebar.success(f"β Districts loaded: {len(districts)} districts") | |
| except FileNotFoundError: | |
| districts = pd.DataFrame() | |
| st.sidebar.warning("β οΈ District coordinates file not found") | |
| except Exception as e: | |
| districts = pd.DataFrame() | |
| st.sidebar.error(f"β Error loading districts: {str(e)}") | |
| return posts, comments, wards, districts, data_status | |
| # ------------------------ | |
| # Crime Analysis Functions | |
| # ------------------------ | |
| def classify_crime_severity(text): | |
| """Classify posts by crime severity based on keywords""" | |
| text_lower = str(text).lower() | |
| severity_score = 0 | |
| for keyword in DRUG_KEYWORDS['high_risk']: | |
| if keyword in text_lower: | |
| severity_score += 3 | |
| for keyword in DRUG_KEYWORDS['substance']: | |
| if keyword in text_lower: | |
| severity_score += 2 | |
| for keyword in DRUG_KEYWORDS['activity']: | |
| if keyword in text_lower: | |
| severity_score += 1 | |
| if severity_score >= 5: | |
| return 'Critical' | |
| elif severity_score >= 3: | |
| return 'High' | |
| elif severity_score >= 1: | |
| return 'Medium' | |
| else: | |
| return 'Low' | |
| def extract_drug_mentions(text): | |
| """Extract specific drug mentions from text""" | |
| text_lower = str(text).lower() | |
| drugs_found = [] | |
| for drug in DRUG_KEYWORDS['substance']: | |
| if drug in text_lower: | |
| drugs_found.append(drug.capitalize()) | |
| return ', '.join(drugs_found) if drugs_found else 'Unspecified' | |
| def calculate_threat_score(row): | |
| """Calculate threat score based on multiple factors""" | |
| score = 0 | |
| text = str(row.get('text', '')) + ' ' + str(row.get('title', '')) | |
| text_lower = text.lower() | |
| for keyword in DRUG_KEYWORDS['high_risk']: | |
| if keyword in text_lower: | |
| score += 10 | |
| if 'score' in row: | |
| score += min(int(row.get('score', 0)) / 10, 5) | |
| if 'num_comments' in row: | |
| score += min(int(row.get('num_comments', 0)) / 5, 5) | |
| sentiment = TextBlob(text).sentiment.polarity | |
| if sentiment < -0.2: | |
| score += 5 | |
| return min(score, 100) | |
| # ------------------------ | |
| # Load All Data | |
| # ------------------------ | |
| posts_df, comments_df, wards_df, districts_df, data_status = load_data( | |
| POSTS_FILE, COMMENTS_FILE, WARD_COORDS_FILE, DISTRICT_COORDS_FILE | |
| ) | |
| # ------------------------ | |
| # Data Processing | |
| # ------------------------ | |
| def process_datetime(df, datetime_col='created_utc'): | |
| """Process datetime column with robust error handling""" | |
| if datetime_col not in df.columns: | |
| return df | |
| df["datetime"] = pd.to_datetime(df[datetime_col], errors='coerce') | |
| df["date"] = df["datetime"].dt.date | |
| df["hour"] = df["datetime"].dt.hour | |
| df["day_of_week"] = df["datetime"].dt.day_name() | |
| return df | |
| # Normalize coordinate names | |
| if not wards_df.empty and "ward_name" in wards_df.columns: | |
| wards_df["ward_name"] = wards_df["ward_name"].astype(str).str.strip().str.lower() | |
| if not districts_df.empty and "district_name" in districts_df.columns: | |
| districts_df["district_name"] = districts_df["district_name"].astype(str).str.strip().str.lower() | |
| # District mapping | |
| district_mapping = { | |
| "bangalore": "bengaluru", | |
| "blr": "bengaluru", | |
| "mysore": "mysuru", | |
| } | |
| # Create patterns | |
| ward_pattern = None | |
| district_pattern = None | |
| if not wards_df.empty: | |
| ward_list = wards_df["ward_name"].str.lower().tolist() | |
| ward_pattern = r'\b(' + '|'.join(re.escape(w) for w in ward_list) + r')\b' | |
| if not districts_df.empty: | |
| district_list = districts_df["district_name"].str.lower().tolist() | |
| district_pattern = r'\b(' + '|'.join(re.escape(d) for d in district_list) + r')\b' | |
| def extract_locations(text_series, patterns): | |
| """Extract locations from text using regex patterns""" | |
| locations = [] | |
| for text in text_series.fillna(""): | |
| matches = [] | |
| for pattern in patterns: | |
| matches.extend(re.findall(pattern, str(text).lower())) | |
| matches = list(set(matches)) | |
| locations.append(", ".join(matches)) | |
| return pd.Series(locations, index=text_series.index) | |
| # Process posts | |
| if not posts_df.empty: | |
| posts_df = process_datetime(posts_df) | |
| post_text = (posts_df.get("title", "") + " " + posts_df.get("text", "")).fillna("") | |
| if ward_pattern: | |
| posts_df["ward_location"] = extract_locations(post_text, [ward_pattern]) | |
| else: | |
| posts_df["ward_location"] = "" | |
| if district_pattern: | |
| posts_df["district_location"] = extract_locations(post_text, [district_pattern]) | |
| else: | |
| posts_df["district_location"] = "" | |
| posts_df["district_location"] = posts_df["district_location"].replace(district_mapping) | |
| posts_df["severity"] = post_text.apply(classify_crime_severity) | |
| posts_df["drugs_mentioned"] = post_text.apply(extract_drug_mentions) | |
| posts_df["threat_score"] = posts_df.apply(calculate_threat_score, axis=1) | |
| posts_df["sentiment_score"] = post_text.apply(lambda x: TextBlob(str(x)).sentiment.polarity) | |
| posts_df["sentiment"] = posts_df["sentiment_score"].apply( | |
| lambda x: "Positive" if x > 0 else ("Negative" if x < 0 else "Neutral") | |
| ) | |
| # Process comments | |
| if not comments_df.empty: | |
| comments_df = process_datetime(comments_df) | |
| # ------------------------ | |
| # Dashboard Header | |
| # ------------------------ | |
| st.title("π¨ Reddit based Drug Crime Intelligence Dashboard") | |
| st.markdown("**Real-time intelligence analysis of drug-related criminal activities from Reddit social media monitoring**") | |
| # ------------------------ | |
| # Sidebar Filters | |
| # ------------------------ | |
| st.sidebar.title("π§ Intelligence Controls") | |
| if st.sidebar.button("π Refresh Data"): | |
| st.cache_data.clear() | |
| st.rerun() | |
| # Severity filter | |
| if not posts_df.empty and "severity" in posts_df.columns: | |
| severity_filter = st.sidebar.multiselect( | |
| "β οΈ Crime Severity Level", | |
| options=['Critical', 'High', 'Medium', 'Low'], | |
| default=['Critical', 'High'] | |
| ) | |
| if severity_filter: | |
| posts_df = posts_df[posts_df["severity"].isin(severity_filter)] | |
| # Date range filter | |
| if not posts_df.empty and "datetime" in posts_df.columns: | |
| min_date = posts_df["datetime"].min().date() | |
| max_date = posts_df["datetime"].max().date() | |
| date_range = st.sidebar.date_input( | |
| "π Select Date Range", | |
| value=(min_date, max_date), | |
| min_value=min_date, | |
| max_value=max_date | |
| ) | |
| if len(date_range) == 2: | |
| posts_df = posts_df[ | |
| (posts_df["date"] >= date_range[0]) & | |
| (posts_df["date"] <= date_range[1]) | |
| ] | |
| # Subreddit filter | |
| if not posts_df.empty and "subreddit" in posts_df.columns: | |
| subreddits = st.sidebar.multiselect( | |
| "π± Filter by Subreddits", | |
| options=posts_df["subreddit"].unique(), | |
| default=posts_df["subreddit"].value_counts().head(5).index.tolist() | |
| ) | |
| if subreddits: | |
| posts_df = posts_df[posts_df["subreddit"].isin(subreddits)] | |
| # Keyword search | |
| search_keyword = st.sidebar.text_input("π Search Keywords in Content") | |
| if search_keyword: | |
| posts_df = posts_df[ | |
| posts_df["text"].str.contains(search_keyword, case=False, na=False) | | |
| posts_df["title"].str.contains(search_keyword, case=False, na=False) | |
| ] | |
| # ------------------------ | |
| # Main Dashboard Content | |
| # ------------------------ | |
| if posts_df.empty and comments_df.empty: | |
| st.error("π« No intelligence data available. Please ensure data collection is operational.") | |
| st.stop() | |
| # --- Crime Intelligence Metrics | |
| st.subheader("π Crime Intelligence Overview") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| critical_posts = len(posts_df[posts_df["severity"] == "Critical"]) if "severity" in posts_df.columns else 0 | |
| st.metric( | |
| label="Critical Threats", | |
| value=critical_posts, | |
| delta=f"{(critical_posts/len(posts_df)*100):.1f}%" if len(posts_df) > 0 else "0%" | |
| ) | |
| with col2: | |
| avg_threat = posts_df["threat_score"].mean() if "threat_score" in posts_df.columns else 0 | |
| st.metric( | |
| label="Avg Threat Score", | |
| value=f"{avg_threat:.1f}", | |
| delta="High" if avg_threat > 50 else "Moderate" | |
| ) | |
| with col3: | |
| if "ward_location" in posts_df.columns: | |
| ward_exploded_temp = posts_df[posts_df["ward_location"] != ""].copy() | |
| ward_exploded_temp["ward_location"] = ward_exploded_temp["ward_location"].str.split(", ") | |
| ward_exploded_temp = ward_exploded_temp.explode("ward_location") | |
| unique_locations = ward_exploded_temp["ward_location"].nunique() | |
| st.metric( | |
| label="Active Locations", | |
| value=unique_locations | |
| ) | |
| with col4: | |
| drug_types = posts_df["drugs_mentioned"].str.split(", ").explode().nunique() if "drugs_mentioned" in posts_df.columns else 0 | |
| st.metric( | |
| label="Drug Types Identified", | |
| value=drug_types | |
| ) | |
| st.markdown("---") | |
| # --- Crime Severity Distribution | |
| if "severity" in posts_df.columns: | |
| st.subheader("β οΈ Crime Severity Analysis") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| severity_counts = posts_df["severity"].value_counts() | |
| fig_severity = px.pie( | |
| values=severity_counts.values, | |
| names=severity_counts.index, | |
| title="Crime Severity Distribution", | |
| color=severity_counts.index, | |
| color_discrete_map={ | |
| 'Critical': '#FF0000', | |
| 'High': '#FF6B00', | |
| 'Medium': '#FFD700', | |
| 'Low': '#90EE90' | |
| } | |
| ) | |
| st.plotly_chart(fig_severity, use_container_width=True) | |
| with col2: | |
| fig_threat = px.histogram( | |
| posts_df, | |
| x="threat_score", | |
| nbins=20, | |
| title="Threat Score Distribution", | |
| labels={"threat_score": "Threat Score", "count": "Number of Posts"} | |
| ) | |
| fig_threat.add_vline(x=50, line_dash="dash", line_color="red", annotation_text="High Threat Threshold") | |
| st.plotly_chart(fig_threat, use_container_width=True) | |
| st.markdown("---") | |
| # --- Drug Type Analysis | |
| if "drugs_mentioned" in posts_df.columns: | |
| st.subheader("π Substance Intelligence") | |
| all_drugs = posts_df["drugs_mentioned"].str.split(", ").explode() | |
| drug_counts = all_drugs[all_drugs != "Unspecified"].value_counts().head(10) | |
| if not drug_counts.empty: | |
| fig_drugs = px.bar( | |
| x=drug_counts.values, | |
| y=drug_counts.index, | |
| orientation='h', | |
| title="Top 10 Substances Mentioned", | |
| labels={"x": "Mentions", "y": "Substance"}, | |
| color=drug_counts.values, | |
| color_continuous_scale="Reds" | |
| ) | |
| st.plotly_chart(fig_drugs, use_container_width=True) | |
| st.markdown("---") | |
| # --- Timeline Analysis | |
| if "date" in posts_df.columns: | |
| st.subheader("π Crime Activity Timeline") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| daily_data = posts_df.groupby(["date", "severity"]).size().reset_index(name="count") | |
| fig_daily = px.line( | |
| daily_data, | |
| x="date", | |
| y="count", | |
| color="severity", | |
| title="Daily Crime Activity by Severity", | |
| labels={"count": "Number of Incidents", "date": "Date"}, | |
| color_discrete_map={ | |
| 'Critical': '#FF0000', | |
| 'High': '#FF6B00', | |
| 'Medium': '#FFD700', | |
| 'Low': '#90EE90' | |
| } | |
| ) | |
| st.plotly_chart(fig_daily, use_container_width=True) | |
| with col2: | |
| if "hour" in posts_df.columns and "day_of_week" in posts_df.columns: | |
| hourly_activity = posts_df.groupby(["day_of_week", "hour"]).size().reset_index(name="count") | |
| fig_hourly = px.density_heatmap( | |
| hourly_activity, | |
| x="hour", | |
| y="day_of_week", | |
| z="count", | |
| title="Activity Heatmap - High-Risk Hours", | |
| labels={"hour": "Hour of Day", "day_of_week": "Day", "count": "Incidents"}, | |
| color_continuous_scale="Reds" | |
| ) | |
| st.plotly_chart(fig_hourly, use_container_width=True) | |
| st.markdown("---") | |
| # --- Geographic Intelligence - COMBINED MAP | |
| st.subheader("πΊοΈ Geographic Crime Intelligence") | |
| # Process both ward and district data | |
| ward_data_available = not wards_df.empty and "ward_location" in posts_df.columns | |
| district_data_available = not districts_df.empty and "district_location" in posts_df.columns | |
| if ward_data_available or district_data_available: | |
| st.markdown("**Crime hotspot analysis across Karnataka (Wards & Districts)**") | |
| # Prepare ward data | |
| merged_wards = pd.DataFrame() | |
| if ward_data_available: | |
| ward_posts = posts_df[posts_df["ward_location"] != ""].copy() | |
| ward_exploded = ward_posts.copy() | |
| ward_exploded["ward_location"] = ward_posts["ward_location"].str.split(", ") | |
| ward_exploded = ward_exploded.explode("ward_location") | |
| ward_exploded["ward_location"] = ward_exploded["ward_location"].str.strip().str.lower() | |
| loc_counts = ward_exploded.groupby("ward_location").size().reset_index(name="count") | |
| merged_wards = pd.merge(loc_counts, wards_df, left_on="ward_location", right_on="ward_name", how="inner") | |
| merged_wards["location_type"] = "Ward" | |
| merged_wards["location_name"] = merged_wards["ward_name"] | |
| # Prepare district data | |
| merged_districts = pd.DataFrame() | |
| if district_data_available: | |
| district_posts = posts_df[posts_df["district_location"] != ""].copy() | |
| district_exploded = district_posts.copy() | |
| district_exploded["district_location"] = district_posts["district_location"].str.split(", ") | |
| district_exploded = district_exploded.explode("district_location") | |
| district_exploded["district_location"] = district_exploded["district_location"].str.strip().str.lower() | |
| district_counts = district_exploded.groupby("district_location").size().reset_index(name="count") | |
| merged_districts = pd.merge(district_counts, districts_df, left_on="district_location", right_on="district_name", how="inner") | |
| merged_districts["location_type"] = "District" | |
| merged_districts["location_name"] = merged_districts["district_name"] | |
| # Combine both datasets | |
| all_locations = pd.concat([merged_wards, merged_districts], ignore_index=True) | |
| if not all_locations.empty: | |
| # Determine center of map | |
| center_lat = all_locations["lat"].mean() | |
| center_lon = all_locations["lon"].mean() | |
| # Create unified map | |
| m_unified = folium.Map( | |
| location=[center_lat, center_lon], | |
| zoom_start=9 if ward_data_available else 7, | |
| tiles="OpenStreetMap" | |
| ) | |
| # Add heatmap layer | |
| heat_data = [[row["lat"], row["lon"], row["count"]] for _, row in all_locations.iterrows()] | |
| HeatMap(heat_data, radius=20, blur=15, max_zoom=13, gradient={ | |
| 0.0: 'blue', 0.5: 'yellow', 0.75: 'orange', 1.0: 'red' | |
| }).add_to(m_unified) | |
| # Determine hotspot threshold | |
| threshold = all_locations["count"].quantile(0.70) | |
| all_locations["is_hotspot"] = all_locations["count"] >= threshold | |
| # Add markers for each location | |
| for _, row in all_locations.iterrows(): | |
| location_name = row["location_name"].title() | |
| location_type = row["location_type"] | |
| incident_count = row["count"] | |
| # Get location-specific crime data | |
| if location_type == "Ward": | |
| loc_data = posts_df[posts_df["ward_location"].str.contains(row["location_name"], case=False, na=False)] | |
| else: | |
| loc_data = posts_df[posts_df["district_location"].str.contains(row["location_name"], case=False, na=False)] | |
| # Severity breakdown | |
| severity_breakdown = loc_data["severity"].value_counts().to_dict() | |
| severity_html = "<br>".join([f" β’ {sev}: {count}" for sev, count in severity_breakdown.items()]) | |
| # Critical incidents count | |
| critical_count = severity_breakdown.get("Critical", 0) | |
| # Top drugs in this location | |
| loc_drugs = loc_data["drugs_mentioned"].str.split(", ").explode() | |
| top_drugs = loc_drugs[loc_drugs != "Unspecified"].value_counts().head(3) | |
| drugs_html = "<br>".join([f" β’ {drug}: {count}" for drug, count in top_drugs.items()]) | |
| # Average threat score | |
| avg_threat = loc_data["threat_score"].mean() | |
| # Recent high-threat incidents | |
| recent = loc_data.nlargest(3, "threat_score")[["title", "severity", "threat_score"]] | |
| incidents_html = "<br>".join([ | |
| f" β’ <b>[{r['severity']}]</b> {r['title'][:50]}... <i>(Score: {r['threat_score']:.0f})</i>" | |
| for _, r in recent.iterrows() | |
| ]) | |
| # Marker color based on severity | |
| marker_color = 'darkred' if row["is_hotspot"] else ('red' if incident_count >= 5 else ('orange' if incident_count >= 3 else 'blue')) | |
| # Icon based on type | |
| icon_symbol = 'home' if location_type == "Ward" else 'map' | |
| # Create detailed popup | |
| popup_html = f""" | |
| <div style='width: 350px; font-family: Arial, sans-serif;'> | |
| <h3 style='color: {marker_color}; margin-bottom: 8px; border-bottom: 2px solid {marker_color}; padding-bottom: 5px;'> | |
| {location_type}: {location_name} | |
| </h3> | |
| <div style='margin: 10px 0;'> | |
| <b>π Total Incidents:</b> <span style='font-size: 18px; color: {marker_color};'>{incident_count}</span><br> | |
| <b>π¨ Critical Threats:</b> <span style='font-size: 18px; color: darkred;'>{critical_count}</span><br> | |
| <b>π Avg Threat Score:</b> <span style='font-size: 16px;'>{avg_threat:.1f}/100</span> | |
| </div> | |
| <hr style='border: 1px solid #ddd;'> | |
| <div style='margin: 10px 0;'> | |
| <b>β οΈ Severity Breakdown:</b><br> | |
| {severity_html if severity_html else ' No data'} | |
| </div> | |
| <hr style='border: 1px solid #ddd;'> | |
| <div style='margin: 10px 0;'> | |
| <b>π Top Substances Detected:</b><br> | |
| {drugs_html if not top_drugs.empty else ' None identified'} | |
| </div> | |
| <hr style='border: 1px solid #ddd;'> | |
| <div style='margin: 10px 0;'> | |
| <b>π― Recent High-Threat Incidents:</b><br> | |
| {incidents_html if not recent.empty else ' None'} | |
| </div> | |
| <div style='margin-top: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 5px; text-align: center; font-size: 11px;'> | |
| <i>Click marker for details β’ Hover for quick info</i> | |
| </div> | |
| </div> | |
| """ | |
| # Tooltip (hover text) | |
| tooltip_text = f""" | |
| <b>{location_type}: {location_name}</b><br> | |
| Total Incidents: {incident_count}<br> | |
| Critical: {critical_count} | Avg Threat: {avg_threat:.1f} | |
| """ | |
| # Add marker | |
| folium.CircleMarker( | |
| location=[row["lat"], row["lon"]], | |
| radius=min(incident_count * 2.5 if location_type == "Ward" else incident_count * 3.5, 25), | |
| color=marker_color, | |
| fill=True, | |
| fill_color=marker_color, | |
| fill_opacity=0.7, | |
| weight=2, | |
| popup=folium.Popup(popup_html, max_width=400), | |
| tooltip=folium.Tooltip(tooltip_text, sticky=True) | |
| ).add_to(m_unified) | |
| # Display map | |
| st_folium(m_unified, width="100%", height=700) | |
| # Hotspot analysis table | |
| st.subheader("π₯ Top Crime Hotspots") | |
| col1 = st.columns(1) | |
| with col1[0]: | |
| st.markdown("**High-Activity Wards**") | |
| if not merged_wards.empty: | |
| ward_display = merged_wards.sort_values("count", ascending=False).head(10) | |
| st.dataframe( | |
| ward_display[["ward_name", "count"]].rename(columns={ | |
| "ward_name": "Ward Name", | |
| "count": "Incidents" | |
| }).reset_index(drop=True), | |
| use_container_width=True, | |
| height=300 | |
| ) | |
| else: | |
| st.info("No ward data available") | |
| st.markdown("---") | |
| # --- High-Priority Intelligence Reports | |
| st.subheader("π¨ High-Priority Intelligence Reports") | |
| if not posts_df.empty: | |
| priority_posts = posts_df[ | |
| (posts_df["severity"].isin(['Critical', 'High'])) | | |
| (posts_df["threat_score"] >= 50) | |
| ].sort_values("threat_score", ascending=False) | |
| if not priority_posts.empty: | |
| priority_posts = priority_posts.drop_duplicates(subset=['id'], keep='first') | |
| display_cols = ["datetime", "title", "severity", "threat_score", "drugs_mentioned", "ward_location", "subreddit"] | |
| available_cols = [col for col in display_cols if col in priority_posts.columns] | |
| st.dataframe( | |
| priority_posts[available_cols].head(50).rename(columns={ | |
| "datetime": "Timestamp", | |
| "title": "Intelligence Report", | |
| "severity": "Severity", | |
| "threat_score": "Threat Score", | |
| "drugs_mentioned": "Substances", | |
| "ward_location": "Location", | |
| "subreddit": "Source" | |
| }), | |
| use_container_width=True, | |
| height=400 | |
| ) | |
| st.download_button( | |
| label="π₯ Download Priority Reports (CSV)", | |
| data=priority_posts[available_cols].to_csv(index=False).encode("utf-8"), | |
| file_name=f"priority_intelligence_{datetime.now().strftime('%Y%m%d')}.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| st.info("No high-priority incidents in selected date range") | |
| else: | |
| st.info("No intelligence data available") | |
| st.markdown("---") | |
| # --- Advanced Analytics Section | |
| st.subheader("π¬ Advanced Crime Analytics") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if "hour" in posts_df.columns and "severity" in posts_df.columns: | |
| st.markdown("**Crime Patterns by Time of Day**") | |
| time_severity = posts_df.groupby(["hour", "severity"]).size().reset_index(name="count") | |
| fig_time = px.bar( | |
| time_severity, | |
| x="hour", | |
| y="count", | |
| color="severity", | |
| title="Crime Activity by Hour and Severity", | |
| labels={"hour": "Hour of Day", "count": "Incidents"}, | |
| color_discrete_map={ | |
| 'Critical': '#FF0000', | |
| 'High': '#FF6B00', | |
| 'Medium': '#FFD700', | |
| 'Low': '#90EE90' | |
| } | |
| ) | |
| st.plotly_chart(fig_time, use_container_width=True) | |
| with col2: | |
| if "sentiment_score" in posts_df.columns and "severity" in posts_df.columns: | |
| st.markdown("**Sentiment vs Crime Severity**") | |
| fig_sentiment_severity = px.box( | |
| posts_df, | |
| x="severity", | |
| y="sentiment_score", | |
| color="severity", | |
| title="Sentiment Distribution by Crime Severity", | |
| labels={"sentiment_score": "Sentiment Score", "severity": "Crime Severity"}, | |
| color_discrete_map={ | |
| 'Critical': '#FF0000', | |
| 'High': '#FF6B00', | |
| 'Medium': '#FFD700', | |
| 'Low': '#90EE90' | |
| } | |
| ) | |
| st.plotly_chart(fig_sentiment_severity, use_container_width=True) | |
| st.markdown("---") | |
| # --- Network Analysis | |
| if "subreddit" in posts_df.columns and "drugs_mentioned" in posts_df.columns: | |
| st.subheader("πΈοΈ Source-Substance Network Analysis") | |
| source_drug = posts_df[posts_df["drugs_mentioned"] != "Unspecified"].groupby( | |
| ["subreddit", "drugs_mentioned"] | |
| ).size().reset_index(name="mentions") | |
| if not source_drug.empty: | |
| top_relationships = source_drug.nlargest(15, "mentions") | |
| fig_network = px.bar( | |
| top_relationships, | |
| x="mentions", | |
| y="subreddit", | |
| color="drugs_mentioned", | |
| orientation='h', | |
| title="Top Source-Substance Relationships", | |
| labels={"mentions": "Number of Mentions", "subreddit": "Source Community"}, | |
| height=500 | |
| ) | |
| st.plotly_chart(fig_network, use_container_width=True) | |
| st.markdown("---") | |
| # --- Emerging Threats Detection | |
| st.subheader("β‘ Emerging Threats Detection") | |
| if "date" in posts_df.columns and "threat_score" in posts_df.columns: | |
| today = posts_df["date"].max() | |
| last_week = today - timedelta(days=7) | |
| prev_week = last_week - timedelta(days=7) | |
| recent_threats = posts_df[posts_df["date"] >= last_week]["threat_score"].mean() | |
| previous_threats = posts_df[(posts_df["date"] >= prev_week) & (posts_df["date"] < last_week)]["threat_score"].mean() | |
| threat_change = ((recent_threats - previous_threats) / previous_threats * 100) if previous_threats > 0 else 0 | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric( | |
| "Threat Level Trend", | |
| f"{recent_threats:.1f}", | |
| f"{threat_change:+.1f}%", | |
| delta_color="inverse" | |
| ) | |
| with col2: | |
| recent_locs = set(posts_df[posts_df["date"] >= last_week]["ward_location"].str.split(", ").explode()) | |
| prev_locs = set(posts_df[posts_df["date"] < last_week]["ward_location"].str.split(", ").explode()) | |
| new_locations = len(recent_locs - prev_locs) | |
| st.metric("New Active Locations", new_locations) | |
| with col3: | |
| daily_avg = posts_df.groupby("date").size().mean() | |
| recent_avg = posts_df[posts_df["date"] >= last_week].groupby("date").size().mean() | |
| spike = recent_avg > daily_avg * 1.5 | |
| st.metric("Activity Status", "β οΈ SPIKE" if spike else "β Normal") | |
| st.markdown("---") | |
| # --- Intelligence Summary Report | |
| st.subheader("π Executive Intelligence Summary") | |
| summary_col1, summary_col2 = st.columns(2) | |
| with summary_col1: | |
| st.markdown("**Key Findings:**") | |
| if not posts_df.empty: | |
| if "ward_location" in posts_df.columns and "threat_score" in posts_df.columns: | |
| ward_posts_with_location = posts_df[posts_df["ward_location"] != ""].copy() | |
| if not ward_posts_with_location.empty: | |
| ward_exploded_threat = ward_posts_with_location.copy() | |
| ward_exploded_threat["ward_location"] = ward_posts_with_location["ward_location"].str.split(", ") | |
| ward_exploded_threat = ward_exploded_threat.explode("ward_location").reset_index(drop=True) | |
| ward_threat = ward_exploded_threat.groupby("ward_location")["threat_score"].mean().sort_values(ascending=False) | |
| if not ward_threat.empty: | |
| st.markdown(f"π― **Highest Threat Zone:** {ward_threat.index[0].title()} (Score: {ward_threat.iloc[0]:.1f})") | |
| if "drugs_mentioned" in posts_df.columns: | |
| top_drug = posts_df["drugs_mentioned"].str.split(", ").explode().value_counts() | |
| if len(top_drug) > 0 and top_drug.index[0] != "Unspecified": | |
| st.markdown(f"π **Primary Substance:** {top_drug.index[0]} ({top_drug.iloc[0]} mentions)") | |
| if "hour" in posts_df.columns: | |
| peak_hour = posts_df["hour"].mode()[0] | |
| st.markdown(f"π **Peak Activity Time:** {peak_hour}:00 - {peak_hour+1}:00") | |
| if "subreddit" in posts_df.columns: | |
| top_source = posts_df["subreddit"].value_counts().index[0] | |
| st.markdown(f"π± **Primary Intelligence Source:** r/{top_source}") | |
| with summary_col2: | |
| st.markdown("**Risk Assessment:**") | |
| if not posts_df.empty and "severity" in posts_df.columns: | |
| critical_pct = (len(posts_df[posts_df["severity"] == "Critical"]) / len(posts_df) * 100) | |
| if critical_pct > 30: | |
| risk_level = "π΄ CRITICAL" | |
| risk_desc = "Immediate action required" | |
| elif critical_pct > 15: | |
| risk_level = "π HIGH" | |
| risk_desc = "Enhanced monitoring recommended" | |
| elif critical_pct > 5: | |
| risk_level = "π‘ MODERATE" | |
| risk_desc = "Standard surveillance protocols" | |
| else: | |
| risk_level = "π’ LOW" | |
| risk_desc = "Routine monitoring sufficient" | |
| st.markdown(f"**Overall Risk Level:** {risk_level}") | |
| st.markdown(f"*{risk_desc}*") | |
| st.markdown(f"- Critical incidents: {critical_pct:.1f}%") | |
| st.markdown(f"- Total monitored incidents: {len(posts_df)}") | |
| st.markdown(f"- Date range: {posts_df['date'].min()} to {posts_df['date'].max()}") | |
| st.markdown("---") | |
| # --- Export Options | |
| st.subheader("π€ Export Intelligence Reports") | |
| export_col1, export_col2, export_col3 = st.columns(3) | |
| with export_col1: | |
| if not posts_df.empty: | |
| full_export = posts_df.to_csv(index=False).encode("utf-8") | |
| st.download_button( | |
| label="π Full Dataset", | |
| data=full_export, | |
| file_name=f"intelligence_full_{datetime.now().strftime('%Y%m%d')}.csv", | |
| mime="text/csv" | |
| ) | |
| with export_col2: | |
| if "severity" in posts_df.columns: | |
| critical_data = posts_df[posts_df["severity"] == "Critical"] | |
| if not critical_data.empty: | |
| critical_export = critical_data.to_csv(index=False).encode("utf-8") | |
| st.download_button( | |
| label="π¨ Critical Incidents", | |
| data=critical_export, | |
| file_name=f"critical_incidents_{datetime.now().strftime('%Y%m%d')}.csv", | |
| mime="text/csv" | |
| ) | |
| with export_col3: | |
| if 'merged_wards' in locals() and not merged_wards.empty: | |
| location_export = merged_wards.to_csv(index=False).encode("utf-8") | |
| st.download_button( | |
| label="πΊοΈ Location Analysis", | |
| data=location_export, | |
| file_name=f"location_analysis_{datetime.now().strftime('%Y%m%d')}.csv", | |
| mime="text/csv" | |
| ) | |
| st.markdown("---") | |
| # --- System Status Footer | |
| st.markdown("**π Intelligence System Status:**") | |
| status_cols = st.columns(4) | |
| with status_cols[0]: | |
| st.write("π Posts:", "β Online" if data_status["posts"] else "β Offline") | |
| with status_cols[1]: | |
| st.write("π¬ Comments:", "β Online" if data_status["comments"] else "β Offline") | |
| with status_cols[2]: | |
| st.write("ποΈ Wards:", "β Online" if data_status["wards"] else "β Offline") | |
| with status_cols[3]: | |
| st.write("π Districts:", "β Online" if data_status["districts"] else "β Offline") | |
| try: | |
| file_mod_time = datetime.fromtimestamp(os.path.getmtime(POSTS_FILE)) | |
| st.markdown(f"*Intelligence data last updated: {file_mod_time.strftime('%Y-%m-%d %H:%M:%S')}*") | |
| except: | |
| pass | |
| st.markdown("---") |