Spaces:

lawlevisan
/

Reddit-Analysis

Sleeping

App Files Files Community

lawlevisan commited on Oct 22, 2025

Commit

228c79c

verified ·

1 Parent(s): 84d37de

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +902 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,904 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import folium
+from folium.plugins import HeatMap, MarkerCluster
+from streamlit_folium import st_folium
+from datetime import datetime, timedelta
+import re
+import os
+from textblob import TextBlob
+# ------------------------
+# Config
+# ------------------------
+st.set_page_config(
+    page_title="Reddit based Drug Crime Intelligence Dashboard",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Paths to data files
+POSTS_FILE = "data/processed/reddit_posts_filtered.csv"
+COMMENTS_FILE = "data/processed/reddit_comments_filtered.csv"
+WARD_COORDS_FILE = "data/bangalore_wards_coordinates.csv"
+DISTRICT_COORDS_FILE = "data/karnataka_districts_coordinates.csv"
+# Drug-related keywords for classification
+DRUG_KEYWORDS = {
+    'high_risk': ['dealing', 'dealer', 'supply', 'trafficking', 'smuggling', 'cartel', 'seized', 'arrest', 'raid'],
+    'substance': ['cocaine', 'heroin', 'mdma', 'meth', 'cannabis', 'marijuana', 'ganja', 'weed', 'lsd', 'ecstasy'],
+    'activity': ['selling', 'buying', 'distribution', 'possession', 'consumption', 'overdose', 'addiction']
+}
+# ------------------------
+# Enhanced Data Loading
+# ------------------------
+@st.cache_data
+def load_data(posts_file, comments_file, ward_file, district_file):
+    """Load all data files with comprehensive error handling"""
+    data_status = {"posts": False, "comments": False, "wards": False, "districts": False}
+    # Load posts
+    try:
+        posts = pd.read_csv(posts_file, dtype=str)
+        posts = posts.drop_duplicates(subset=['id'], keep='first')
+        data_status["posts"] = True
+        st.sidebar.success(f"✅ Posts loaded: {len(posts)} records")
+    except FileNotFoundError:
+        posts = pd.DataFrame()
+        st.sidebar.warning("⚠️ Reddit posts file not found")
+    except Exception as e:
+        posts = pd.DataFrame()
+        st.sidebar.error(f"❌ Error loading posts: {str(e)}")
+    # Load comments
+    try:
+        comments = pd.read_csv(comments_file)
+        if 'id' in comments.columns:
+            comments = comments.drop_duplicates(subset=['id'], keep='first')
+        data_status["comments"] = True
+        st.sidebar.success(f"✅ Comments loaded: {len(comments)} records")
+    except FileNotFoundError:
+        comments = pd.DataFrame()
+        st.sidebar.warning("⚠️ Reddit comments file not found")
+    except Exception as e:
+        comments = pd.DataFrame()
+        st.sidebar.error(f"❌ Error loading comments: {str(e)}")
+    # Load ward coordinates
+    try:
+        wards = pd.read_csv(ward_file)
+        if 'ward_name' not in wards.columns and 'name' in wards.columns:
+            wards.rename(columns={'name': 'ward_name'}, inplace=True)
+        data_status["wards"] = True
+        st.sidebar.success(f"✅ Wards loaded: {len(wards)} wards")
+    except FileNotFoundError:
+        wards = pd.DataFrame()
+        st.sidebar.warning("⚠️ Ward coordinates file not found")
+    except Exception as e:
+        wards = pd.DataFrame()
+        st.sidebar.error(f"❌ Error loading wards: {str(e)}")
+    # Load district coordinates
+    try:
+        districts = pd.read_csv(district_file)
+        if 'district_name' not in districts.columns and 'name' in districts.columns:
+            districts.rename(columns={'name': 'district_name'}, inplace=True)
+        data_status["districts"] = True
+        st.sidebar.success(f"✅ Districts loaded: {len(districts)} districts")
+    except FileNotFoundError:
+        districts = pd.DataFrame()
+        st.sidebar.warning("⚠️ District coordinates file not found")
+    except Exception as e:
+        districts = pd.DataFrame()
+        st.sidebar.error(f"❌ Error loading districts: {str(e)}")
+    return posts, comments, wards, districts, data_status
+# ------------------------
+# Crime Analysis Functions
+# ------------------------
+def classify_crime_severity(text):
+    """Classify posts by crime severity based on keywords"""
+    text_lower = str(text).lower()
+    severity_score = 0
+    for keyword in DRUG_KEYWORDS['high_risk']:
+        if keyword in text_lower:
+            severity_score += 3
+    for keyword in DRUG_KEYWORDS['substance']:
+        if keyword in text_lower:
+            severity_score += 2
+    for keyword in DRUG_KEYWORDS['activity']:
+        if keyword in text_lower:
+            severity_score += 1
+    if severity_score >= 5:
+        return 'Critical'
+    elif severity_score >= 3:
+        return 'High'
+    elif severity_score >= 1:
+        return 'Medium'
+    else:
+        return 'Low'
+def extract_drug_mentions(text):
+    """Extract specific drug mentions from text"""
+    text_lower = str(text).lower()
+    drugs_found = []
+    for drug in DRUG_KEYWORDS['substance']:
+        if drug in text_lower:
+            drugs_found.append(drug.capitalize())
+    return ', '.join(drugs_found) if drugs_found else 'Unspecified'
+def calculate_threat_score(row):
+    """Calculate threat score based on multiple factors"""
+    score = 0
+    text = str(row.get('text', '')) + ' ' + str(row.get('title', ''))
+    text_lower = text.lower()
+    for keyword in DRUG_KEYWORDS['high_risk']:
+        if keyword in text_lower:
+            score += 10
+    if 'score' in row:
+        score += min(int(row.get('score', 0)) / 10, 5)
+    if 'num_comments' in row:
+        score += min(int(row.get('num_comments', 0)) / 5, 5)
+    sentiment = TextBlob(text).sentiment.polarity
+    if sentiment < -0.2:
+        score += 5
+    return min(score, 100)
+# ------------------------
+# Load All Data
+# ------------------------
+posts_df, comments_df, wards_df, districts_df, data_status = load_data(
+    POSTS_FILE, COMMENTS_FILE, WARD_COORDS_FILE, DISTRICT_COORDS_FILE
+)
+# ------------------------
+# Data Processing
+# ------------------------
+def process_datetime(df, datetime_col='created_utc'):
+    """Process datetime column with robust error handling"""
+    if datetime_col not in df.columns:
+        return df
+    df["datetime"] = pd.to_datetime(df[datetime_col], errors='coerce')
+    df["date"] = df["datetime"].dt.date
+    df["hour"] = df["datetime"].dt.hour
+    df["day_of_week"] = df["datetime"].dt.day_name()
+    return df
+# Normalize coordinate names
+if not wards_df.empty and "ward_name" in wards_df.columns:
+    wards_df["ward_name"] = wards_df["ward_name"].astype(str).str.strip().str.lower()
+if not districts_df.empty and "district_name" in districts_df.columns:
+    districts_df["district_name"] = districts_df["district_name"].astype(str).str.strip().str.lower()
+# District mapping
+district_mapping = {
+    "bangalore": "bengaluru",
+    "blr": "bengaluru",
+    "mysore": "mysuru",
+}
+# Create patterns
+ward_pattern = None
+district_pattern = None
+if not wards_df.empty:
+    ward_list = wards_df["ward_name"].str.lower().tolist()
+    ward_pattern = r'\b(' + '|'.join(re.escape(w) for w in ward_list) + r')\b'
+if not districts_df.empty:
+    district_list = districts_df["district_name"].str.lower().tolist()
+    district_pattern = r'\b(' + '|'.join(re.escape(d) for d in district_list) + r')\b'
+def extract_locations(text_series, patterns):
+    """Extract locations from text using regex patterns"""
+    locations = []
+    for text in text_series.fillna(""):
+        matches = []
+        for pattern in patterns:
+            matches.extend(re.findall(pattern, str(text).lower()))
+        matches = list(set(matches))
+        locations.append(", ".join(matches))
+    return pd.Series(locations, index=text_series.index)
+# Process posts
+if not posts_df.empty:
+    posts_df = process_datetime(posts_df)
+    post_text = (posts_df.get("title", "") + " " + posts_df.get("text", "")).fillna("")
+    if ward_pattern:
+        posts_df["ward_location"] = extract_locations(post_text, [ward_pattern])
+    else:
+        posts_df["ward_location"] = ""
+    if district_pattern:
+        posts_df["district_location"] = extract_locations(post_text, [district_pattern])
+    else:
+        posts_df["district_location"] = ""
+    posts_df["district_location"] = posts_df["district_location"].replace(district_mapping)
+    posts_df["severity"] = post_text.apply(classify_crime_severity)
+    posts_df["drugs_mentioned"] = post_text.apply(extract_drug_mentions)
+    posts_df["threat_score"] = posts_df.apply(calculate_threat_score, axis=1)
+    posts_df["sentiment_score"] = post_text.apply(lambda x: TextBlob(str(x)).sentiment.polarity)
+    posts_df["sentiment"] = posts_df["sentiment_score"].apply(
+        lambda x: "Positive" if x > 0 else ("Negative" if x < 0 else "Neutral")
+    )
+# Process comments
+if not comments_df.empty:
+    comments_df = process_datetime(comments_df)
+# ------------------------
+# Dashboard Header
+# ------------------------
+st.title("🚨 Reddit based Drug Crime Intelligence Dashboard")
+st.markdown("**Real-time intelligence analysis of drug-related criminal activities from Reddit social media monitoring**")
+# ------------------------
+# Sidebar Filters
+# ------------------------
+st.sidebar.title("🔧 Intelligence Controls")
+if st.sidebar.button("🔄 Refresh Data"):
+    st.cache_data.clear()
+    st.rerun()
+# Severity filter
+if not posts_df.empty and "severity" in posts_df.columns:
+    severity_filter = st.sidebar.multiselect(
+        "⚠️ Crime Severity Level",
+        options=['Critical', 'High', 'Medium', 'Low'],
+        default=['Critical', 'High']
+    )
+    if severity_filter:
+        posts_df = posts_df[posts_df["severity"].isin(severity_filter)]
+# Date range filter
+if not posts_df.empty and "datetime" in posts_df.columns:
+    min_date = posts_df["datetime"].min().date()
+    max_date = posts_df["datetime"].max().date()
+    date_range = st.sidebar.date_input(
+        "📅 Select Date Range",
+        value=(min_date, max_date),
+        min_value=min_date,
+        max_value=max_date
+    )
+    if len(date_range) == 2:
+        posts_df = posts_df[
+            (posts_df["date"] >= date_range[0]) &
+            (posts_df["date"] <= date_range[1])
+        ]
+# Subreddit filter
+if not posts_df.empty and "subreddit" in posts_df.columns:
+    subreddits = st.sidebar.multiselect(
+        "📱 Filter by Subreddits",
+        options=posts_df["subreddit"].unique(),
+        default=posts_df["subreddit"].value_counts().head(5).index.tolist()
+    )
+    if subreddits:
+        posts_df = posts_df[posts_df["subreddit"].isin(subreddits)]
+# Keyword search
+search_keyword = st.sidebar.text_input("🔍 Search Keywords in Content")
+if search_keyword:
+    posts_df = posts_df[
+        posts_df["text"].str.contains(search_keyword, case=False, na=False) |
+        posts_df["title"].str.contains(search_keyword, case=False, na=False)
+    ]
+# ------------------------
+# Main Dashboard Content
+# ------------------------
+if posts_df.empty and comments_df.empty:
+    st.error("🚫 No intelligence data available. Please ensure data collection is operational.")
+    st.stop()
+# --- Crime Intelligence Metrics
+st.subheader("📊 Crime Intelligence Overview")
+col1, col2, col3, col4 = st.columns(4)
+with col1:
+    critical_posts = len(posts_df[posts_df["severity"] == "Critical"]) if "severity" in posts_df.columns else 0
+    st.metric(
+        label="Critical Threats",
+        value=critical_posts,
+        delta=f"{(critical_posts/len(posts_df)*100):.1f}%" if len(posts_df) > 0 else "0%"
+    )
+with col2:
+    avg_threat = posts_df["threat_score"].mean() if "threat_score" in posts_df.columns else 0
+    st.metric(
+        label="Avg Threat Score",
+        value=f"{avg_threat:.1f}",
+        delta="High" if avg_threat > 50 else "Moderate"
+    )
+with col3:
+    if "ward_location" in posts_df.columns:
+        ward_exploded_temp = posts_df[posts_df["ward_location"] != ""].copy()
+        ward_exploded_temp["ward_location"] = ward_exploded_temp["ward_location"].str.split(", ")
+        ward_exploded_temp = ward_exploded_temp.explode("ward_location")
+        unique_locations = ward_exploded_temp["ward_location"].nunique()
+        st.metric(
+            label="Active Locations",
+            value=unique_locations
+        )
+with col4:
+    drug_types = posts_df["drugs_mentioned"].str.split(", ").explode().nunique() if "drugs_mentioned" in posts_df.columns else 0
+    st.metric(
+        label="Drug Types Identified",
+        value=drug_types
+    )
+st.markdown("---")
+# --- Crime Severity Distribution
+if "severity" in posts_df.columns:
+    st.subheader("⚠️ Crime Severity Analysis")
+    col1, col2 = st.columns(2)
+    with col1:
+        severity_counts = posts_df["severity"].value_counts()
+        fig_severity = px.pie(
+            values=severity_counts.values,
+            names=severity_counts.index,
+            title="Crime Severity Distribution",
+            color=severity_counts.index,
+            color_discrete_map={
+                'Critical': '#FF0000',
+                'High': '#FF6B00',
+                'Medium': '#FFD700',
+                'Low': '#90EE90'
+            }
+        )
+        st.plotly_chart(fig_severity, use_container_width=True)
+    with col2:
+        fig_threat = px.histogram(
+            posts_df,
+            x="threat_score",
+            nbins=20,
+            title="Threat Score Distribution",
+            labels={"threat_score": "Threat Score", "count": "Number of Posts"}
+        )
+        fig_threat.add_vline(x=50, line_dash="dash", line_color="red", annotation_text="High Threat Threshold")
+        st.plotly_chart(fig_threat, use_container_width=True)
+st.markdown("---")
+# --- Drug Type Analysis
+if "drugs_mentioned" in posts_df.columns:
+    st.subheader("💊 Substance Intelligence")
+    all_drugs = posts_df["drugs_mentioned"].str.split(", ").explode()
+    drug_counts = all_drugs[all_drugs != "Unspecified"].value_counts().head(10)
+    if not drug_counts.empty:
+        fig_drugs = px.bar(
+            x=drug_counts.values,
+            y=drug_counts.index,
+            orientation='h',
+            title="Top 10 Substances Mentioned",
+            labels={"x": "Mentions", "y": "Substance"},
+            color=drug_counts.values,
+            color_continuous_scale="Reds"
+        )
+        st.plotly_chart(fig_drugs, use_container_width=True)
+st.markdown("---")
+# --- Timeline Analysis
+if "date" in posts_df.columns:
+    st.subheader("📈 Crime Activity Timeline")
+    col1, col2 = st.columns(2)
+    with col1:
+        daily_data = posts_df.groupby(["date", "severity"]).size().reset_index(name="count")
+        fig_daily = px.line(
+            daily_data,
+            x="date",
+            y="count",
+            color="severity",
+            title="Daily Crime Activity by Severity",
+            labels={"count": "Number of Incidents", "date": "Date"},
+            color_discrete_map={
+                'Critical': '#FF0000',
+                'High': '#FF6B00',
+                'Medium': '#FFD700',
+                'Low': '#90EE90'
+            }
+        )
+        st.plotly_chart(fig_daily, use_container_width=True)
+    with col2:
+        if "hour" in posts_df.columns and "day_of_week" in posts_df.columns:
+            hourly_activity = posts_df.groupby(["day_of_week", "hour"]).size().reset_index(name="count")
+            fig_hourly = px.density_heatmap(
+                hourly_activity,
+                x="hour",
+                y="day_of_week",
+                z="count",
+                title="Activity Heatmap - High-Risk Hours",
+                labels={"hour": "Hour of Day", "day_of_week": "Day", "count": "Incidents"},
+                color_continuous_scale="Reds"
+            )
+            st.plotly_chart(fig_hourly, use_container_width=True)
+st.markdown("---")
+# --- Geographic Intelligence - COMBINED MAP
+st.subheader("🗺️ Geographic Crime Intelligence")
+# Process both ward and district data
+ward_data_available = not wards_df.empty and "ward_location" in posts_df.columns
+district_data_available = not districts_df.empty and "district_location" in posts_df.columns
+if ward_data_available or district_data_available:
+    st.markdown("**Crime hotspot analysis across Karnataka (Wards & Districts)**")
+    # Prepare ward data
+    merged_wards = pd.DataFrame()
+    if ward_data_available:
+        ward_posts = posts_df[posts_df["ward_location"] != ""].copy()
+        ward_exploded = ward_posts.copy()
+        ward_exploded["ward_location"] = ward_posts["ward_location"].str.split(", ")
+        ward_exploded = ward_exploded.explode("ward_location")
+        ward_exploded["ward_location"] = ward_exploded["ward_location"].str.strip().str.lower()
+        loc_counts = ward_exploded.groupby("ward_location").size().reset_index(name="count")
+        merged_wards = pd.merge(loc_counts, wards_df, left_on="ward_location", right_on="ward_name", how="inner")
+        merged_wards["location_type"] = "Ward"
+        merged_wards["location_name"] = merged_wards["ward_name"]
+    # Prepare district data
+    merged_districts = pd.DataFrame()
+    if district_data_available:
+        district_posts = posts_df[posts_df["district_location"] != ""].copy()
+        district_exploded = district_posts.copy()
+        district_exploded["district_location"] = district_posts["district_location"].str.split(", ")
+        district_exploded = district_exploded.explode("district_location")
+        district_exploded["district_location"] = district_exploded["district_location"].str.strip().str.lower()
+        district_counts = district_exploded.groupby("district_location").size().reset_index(name="count")
+        merged_districts = pd.merge(district_counts, districts_df, left_on="district_location", right_on="district_name", how="inner")
+        merged_districts["location_type"] = "District"
+        merged_districts["location_name"] = merged_districts["district_name"]
+    # Combine both datasets
+    all_locations = pd.concat([merged_wards, merged_districts], ignore_index=True)
+    if not all_locations.empty:
+        # Determine center of map
+        center_lat = all_locations["lat"].mean()
+        center_lon = all_locations["lon"].mean()
+        # Create unified map
+        m_unified = folium.Map(
+            location=[center_lat, center_lon],
+            zoom_start=9 if ward_data_available else 7,
+            tiles="OpenStreetMap"
+        )
+        # Add heatmap layer
+        heat_data = [[row["lat"], row["lon"], row["count"]] for _, row in all_locations.iterrows()]
+        HeatMap(heat_data, radius=20, blur=15, max_zoom=13, gradient={
+            0.0: 'blue', 0.5: 'yellow', 0.75: 'orange', 1.0: 'red'
+        }).add_to(m_unified)
+        # Determine hotspot threshold
+        threshold = all_locations["count"].quantile(0.70)
+        all_locations["is_hotspot"] = all_locations["count"] >= threshold
+        # Add markers for each location
+        for _, row in all_locations.iterrows():
+            location_name = row["location_name"].title()
+            location_type = row["location_type"]
+            incident_count = row["count"]
+            # Get location-specific crime data
+            if location_type == "Ward":
+                loc_data = posts_df[posts_df["ward_location"].str.contains(row["location_name"], case=False, na=False)]
+            else:
+                loc_data = posts_df[posts_df["district_location"].str.contains(row["location_name"], case=False, na=False)]
+            # Severity breakdown
+            severity_breakdown = loc_data["severity"].value_counts().to_dict()
+            severity_html = "<br>".join([f"&nbsp;&nbsp;• {sev}: {count}" for sev, count in severity_breakdown.items()])
+            # Critical incidents count
+            critical_count = severity_breakdown.get("Critical", 0)
+            # Top drugs in this location
+            loc_drugs = loc_data["drugs_mentioned"].str.split(", ").explode()
+            top_drugs = loc_drugs[loc_drugs != "Unspecified"].value_counts().head(3)
+            drugs_html = "<br>".join([f"&nbsp;&nbsp;• {drug}: {count}" for drug, count in top_drugs.items()])
+            # Average threat score
+            avg_threat = loc_data["threat_score"].mean()
+            # Recent high-threat incidents
+            recent = loc_data.nlargest(3, "threat_score")[["title", "severity", "threat_score"]]
+            incidents_html = "<br>".join([
+                f"&nbsp;&nbsp;• <b>[{r['severity']}]</b> {r['title'][:50]}... <i>(Score: {r['threat_score']:.0f})</i>"
+                for _, r in recent.iterrows()
+            ])
+            # Marker color based on severity
+            marker_color = 'darkred' if row["is_hotspot"] else ('red' if incident_count >= 5 else ('orange' if incident_count >= 3 else 'blue'))
+            # Icon based on type
+            icon_symbol = 'home' if location_type == "Ward" else 'map'
+            # Create detailed popup
+            popup_html = f"""
+            <div style='width: 350px; font-family: Arial, sans-serif;'>
+                <h3 style='color: {marker_color}; margin-bottom: 8px; border-bottom: 2px solid {marker_color}; padding-bottom: 5px;'>
+                    {location_type}: {location_name}
+                </h3>
+                <div style='margin: 10px 0;'>
+                    <b>📊 Total Incidents:</b> <span style='font-size: 18px; color: {marker_color};'>{incident_count}</span><br>
+                    <b>🚨 Critical Threats:</b> <span style='font-size: 18px; color: darkred;'>{critical_count}</span><br>
+                    <b>📈 Avg Threat Score:</b> <span style='font-size: 16px;'>{avg_threat:.1f}/100</span>
+                </div>
+                <hr style='border: 1px solid #ddd;'>
+                <div style='margin: 10px 0;'>
+                    <b>⚠️ Severity Breakdown:</b><br>
+                    {severity_html if severity_html else '&nbsp;&nbsp;No data'}
+                </div>
+                <hr style='border: 1px solid #ddd;'>
+                <div style='margin: 10px 0;'>
+                    <b>💊 Top Substances Detected:</b><br>
+                    {drugs_html if not top_drugs.empty else '&nbsp;&nbsp;None identified'}
+                </div>
+                <hr style='border: 1px solid #ddd;'>
+                <div style='margin: 10px 0;'>
+                    <b>🎯 Recent High-Threat Incidents:</b><br>
+                    {incidents_html if not recent.empty else '&nbsp;&nbsp;None'}
+                </div>
+                <div style='margin-top: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 5px; text-align: center; font-size: 11px;'>
+                    <i>Click marker for details • Hover for quick info</i>
+                </div>
+            </div>
+            """
+            # Tooltip (hover text)
+            tooltip_text = f"""
+            <b>{location_type}: {location_name}</b><br>
+            Total Incidents: {incident_count}<br>
+            Critical: {critical_count} | Avg Threat: {avg_threat:.1f}
+            """
+            # Add marker
+            folium.CircleMarker(
+                location=[row["lat"], row["lon"]],
+                radius=min(incident_count * 2.5 if location_type == "Ward" else incident_count * 3.5, 25),
+                color=marker_color,
+                fill=True,
+                fill_color=marker_color,
+                fill_opacity=0.7,
+                weight=2,
+                popup=folium.Popup(popup_html, max_width=400),
+                tooltip=folium.Tooltip(tooltip_text, sticky=True)
+            ).add_to(m_unified)
+        # Display map
+        st_folium(m_unified, width="100%", height=700)
+        # Hotspot analysis table
+        st.subheader("🔥 Top Crime Hotspots")
+        col1 = st.columns(1)
+        with col1[0]:
+            st.markdown("**High-Activity Wards**")
+            if not merged_wards.empty:
+                ward_display = merged_wards.sort_values("count", ascending=False).head(10)
+                st.dataframe(
+                    ward_display[["ward_name", "count"]].rename(columns={
+                        "ward_name": "Ward Name",
+                        "count": "Incidents"
+                    }).reset_index(drop=True),
+                    use_container_width=True,
+                    height=300
+                )
+            else:
+                st.info("No ward data available")
+st.markdown("---")
+# --- High-Priority Intelligence Reports
+st.subheader("🚨 High-Priority Intelligence Reports")
+if not posts_df.empty:
+    priority_posts = posts_df[
+        (posts_df["severity"].isin(['Critical', 'High'])) |
+        (posts_df["threat_score"] >= 50)
+    ].sort_values("threat_score", ascending=False)
+    if not priority_posts.empty:
+        priority_posts = priority_posts.drop_duplicates(subset=['id'], keep='first')
+        display_cols = ["datetime", "title", "severity", "threat_score", "drugs_mentioned", "ward_location", "subreddit"]
+        available_cols = [col for col in display_cols if col in priority_posts.columns]
+        st.dataframe(
+            priority_posts[available_cols].head(50).rename(columns={
+                "datetime": "Timestamp",
+                "title": "Intelligence Report",
+                "severity": "Severity",
+                "threat_score": "Threat Score",
+                "drugs_mentioned": "Substances",
+                "ward_location": "Location",
+                "subreddit": "Source"
+            }),
+            use_container_width=True,
+            height=400
+        )
+        st.download_button(
+            label="📥 Download Priority Reports (CSV)",
+            data=priority_posts[available_cols].to_csv(index=False).encode("utf-8"),
+            file_name=f"priority_intelligence_{datetime.now().strftime('%Y%m%d')}.csv",
+            mime="text/csv"
+        )
+    else:
+        st.info("No high-priority incidents in selected date range")
+else:
+    st.info("No intelligence data available")
+st.markdown("---")
+# --- Advanced Analytics Section
+st.subheader("🔬 Advanced Crime Analytics")
+col1, col2 = st.columns(2)
+with col1:
+    if "hour" in posts_df.columns and "severity" in posts_df.columns:
+        st.markdown("**Crime Patterns by Time of Day**")
+        time_severity = posts_df.groupby(["hour", "severity"]).size().reset_index(name="count")
+        fig_time = px.bar(
+            time_severity,
+            x="hour",
+            y="count",
+            color="severity",
+            title="Crime Activity by Hour and Severity",
+            labels={"hour": "Hour of Day", "count": "Incidents"},
+            color_discrete_map={
+                'Critical': '#FF0000',
+                'High': '#FF6B00',
+                'Medium': '#FFD700',
+                'Low': '#90EE90'
+            }
+        )
+        st.plotly_chart(fig_time, use_container_width=True)
+with col2:
+    if "sentiment_score" in posts_df.columns and "severity" in posts_df.columns:
+        st.markdown("**Sentiment vs Crime Severity**")
+        fig_sentiment_severity = px.box(
+            posts_df,
+            x="severity",
+            y="sentiment_score",
+            color="severity",
+            title="Sentiment Distribution by Crime Severity",
+            labels={"sentiment_score": "Sentiment Score", "severity": "Crime Severity"},
+            color_discrete_map={
+                'Critical': '#FF0000',
+                'High': '#FF6B00',
+                'Medium': '#FFD700',
+                'Low': '#90EE90'
+            }
+        )
+        st.plotly_chart(fig_sentiment_severity, use_container_width=True)
+st.markdown("---")
+# --- Network Analysis
+if "subreddit" in posts_df.columns and "drugs_mentioned" in posts_df.columns:
+    st.subheader("🕸️ Source-Substance Network Analysis")
+    source_drug = posts_df[posts_df["drugs_mentioned"] != "Unspecified"].groupby(
+        ["subreddit", "drugs_mentioned"]
+    ).size().reset_index(name="mentions")
+    if not source_drug.empty:
+        top_relationships = source_drug.nlargest(15, "mentions")
+        fig_network = px.bar(
+            top_relationships,
+            x="mentions",
+            y="subreddit",
+            color="drugs_mentioned",
+            orientation='h',
+            title="Top Source-Substance Relationships",
+            labels={"mentions": "Number of Mentions", "subreddit": "Source Community"},
+            height=500
+        )
+        st.plotly_chart(fig_network, use_container_width=True)
+st.markdown("---")
+# --- Emerging Threats Detection
+st.subheader("⚡ Emerging Threats Detection")
+if "date" in posts_df.columns and "threat_score" in posts_df.columns:
+    today = posts_df["date"].max()
+    last_week = today - timedelta(days=7)
+    prev_week = last_week - timedelta(days=7)
+    recent_threats = posts_df[posts_df["date"] >= last_week]["threat_score"].mean()
+    previous_threats = posts_df[(posts_df["date"] >= prev_week) & (posts_df["date"] < last_week)]["threat_score"].mean()
+    threat_change = ((recent_threats - previous_threats) / previous_threats * 100) if previous_threats > 0 else 0
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric(
+            "Threat Level Trend",
+            f"{recent_threats:.1f}",
+            f"{threat_change:+.1f}%",
+            delta_color="inverse"
+        )
+    with col2:
+        recent_locs = set(posts_df[posts_df["date"] >= last_week]["ward_location"].str.split(", ").explode())
+        prev_locs = set(posts_df[posts_df["date"] < last_week]["ward_location"].str.split(", ").explode())
+        new_locations = len(recent_locs - prev_locs)
+        st.metric("New Active Locations", new_locations)
+    with col3:
+        daily_avg = posts_df.groupby("date").size().mean()
+        recent_avg = posts_df[posts_df["date"] >= last_week].groupby("date").size().mean()
+        spike = recent_avg > daily_avg * 1.5
+        st.metric("Activity Status", "⚠️ SPIKE" if spike else "✅ Normal")
+st.markdown("---")
+# --- Intelligence Summary Report
+st.subheader("📋 Executive Intelligence Summary")
+summary_col1, summary_col2 = st.columns(2)
+with summary_col1:
+    st.markdown("**Key Findings:**")
+    if not posts_df.empty:
+        if "ward_location" in posts_df.columns and "threat_score" in posts_df.columns:
+            ward_posts_with_location = posts_df[posts_df["ward_location"] != ""].copy()
+            if not ward_posts_with_location.empty:
+                ward_exploded_threat = ward_posts_with_location.copy()
+                ward_exploded_threat["ward_location"] = ward_posts_with_location["ward_location"].str.split(", ")
+                ward_exploded_threat = ward_exploded_threat.explode("ward_location").reset_index(drop=True)
+                ward_threat = ward_exploded_threat.groupby("ward_location")["threat_score"].mean().sort_values(ascending=False)
+                if not ward_threat.empty:
+                    st.markdown(f"🎯 **Highest Threat Zone:** {ward_threat.index[0].title()} (Score: {ward_threat.iloc[0]:.1f})")
+        if "drugs_mentioned" in posts_df.columns:
+            top_drug = posts_df["drugs_mentioned"].str.split(", ").explode().value_counts()
+            if len(top_drug) > 0 and top_drug.index[0] != "Unspecified":
+                st.markdown(f"💊 **Primary Substance:** {top_drug.index[0]} ({top_drug.iloc[0]} mentions)")
+        if "hour" in posts_df.columns:
+            peak_hour = posts_df["hour"].mode()[0]
+            st.markdown(f"🕐 **Peak Activity Time:** {peak_hour}:00 - {peak_hour+1}:00")
+        if "subreddit" in posts_df.columns:
+            top_source = posts_df["subreddit"].value_counts().index[0]
+            st.markdown(f"📱 **Primary Intelligence Source:** r/{top_source}")
+with summary_col2:
+    st.markdown("**Risk Assessment:**")
+    if not posts_df.empty and "severity" in posts_df.columns:
+        critical_pct = (len(posts_df[posts_df["severity"] == "Critical"]) / len(posts_df) * 100)
+        if critical_pct > 30:
+            risk_level = "🔴 CRITICAL"
+            risk_desc = "Immediate action required"
+        elif critical_pct > 15:
+            risk_level = "🟠 HIGH"
+            risk_desc = "Enhanced monitoring recommended"
+        elif critical_pct > 5:
+            risk_level = "🟡 MODERATE"
+            risk_desc = "Standard surveillance protocols"
+        else:
+            risk_level = "🟢 LOW"
+            risk_desc = "Routine monitoring sufficient"
+        st.markdown(f"**Overall Risk Level:** {risk_level}")
+        st.markdown(f"*{risk_desc}*")
+        st.markdown(f"- Critical incidents: {critical_pct:.1f}%")
+        st.markdown(f"- Total monitored incidents: {len(posts_df)}")
+        st.markdown(f"- Date range: {posts_df['date'].min()} to {posts_df['date'].max()}")
+st.markdown("---")
+# --- Export Options
+st.subheader("📤 Export Intelligence Reports")
+export_col1, export_col2, export_col3 = st.columns(3)
+with export_col1:
+    if not posts_df.empty:
+        full_export = posts_df.to_csv(index=False).encode("utf-8")
+        st.download_button(
+            label="📊 Full Dataset",
+            data=full_export,
+            file_name=f"intelligence_full_{datetime.now().strftime('%Y%m%d')}.csv",
+            mime="text/csv"
+        )
+with export_col2:
+    if "severity" in posts_df.columns:
+        critical_data = posts_df[posts_df["severity"] == "Critical"]
+        if not critical_data.empty:
+            critical_export = critical_data.to_csv(index=False).encode("utf-8")
+            st.download_button(
+                label="🚨 Critical Incidents",
+                data=critical_export,
+                file_name=f"critical_incidents_{datetime.now().strftime('%Y%m%d')}.csv",
+                mime="text/csv"
+            )
+with export_col3:
+    if 'merged_wards' in locals() and not merged_wards.empty:
+        location_export = merged_wards.to_csv(index=False).encode("utf-8")
+        st.download_button(
+            label="🗺️ Location Analysis",
+            data=location_export,
+            file_name=f"location_analysis_{datetime.now().strftime('%Y%m%d')}.csv",
+            mime="text/csv"
+        )
+st.markdown("---")
+# --- System Status Footer
+st.markdown("**🔒 Intelligence System Status:**")
+status_cols = st.columns(4)
+with status_cols[0]:
+    st.write("📄 Posts:", "✅ Online" if data_status["posts"] else "❌ Offline")
+with status_cols[1]:
+    st.write("💬 Comments:", "✅ Online" if data_status["comments"] else "❌ Offline")
+with status_cols[2]:
+    st.write("🏘️ Wards:", "✅ Online" if data_status["wards"] else "❌ Offline")
+with status_cols[3]:
+    st.write("🌍 Districts:", "✅ Online" if data_status["districts"] else "❌ Offline")
+try:
+    file_mod_time = datetime.fromtimestamp(os.path.getmtime(POSTS_FILE))
+    st.markdown(f"*Intelligence data last updated: {file_mod_time.strftime('%Y-%m-%d %H:%M:%S')}*")
+except:
+    pass
+st.markdown("---")