Reddit-Analysis / src /streamlit_app.py
lawlevisan's picture
Update src/streamlit_app.py
e8b553c verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import folium
from folium.plugins import HeatMap, MarkerCluster
from streamlit_folium import st_folium
from datetime import datetime, timedelta
import re
import os
from textblob import TextBlob
# ------------------------
# Config
# ------------------------
st.set_page_config(
page_title="Reddit based Drug Crime Intelligence Dashboard",
layout="wide",
initial_sidebar_state="expanded"
)
# Paths to data files
POSTS_FILE = "data/processed/reddit_posts_filtered.csv"
COMMENTS_FILE = "data/processed/reddit_comments_filtered.csv"
WARD_COORDS_FILE = "data/bangalore_wards_coordinates.csv"
DISTRICT_COORDS_FILE = "data/karnataka_districts_coordinates.csv"
# Drug-related keywords for classification
DRUG_KEYWORDS = {
'high_risk': ['dealing', 'dealer', 'supply', 'trafficking', 'smuggling', 'cartel', 'seized', 'arrest', 'raid'],
'substance': ['cocaine', 'heroin', 'mdma', 'meth', 'cannabis', 'marijuana', 'ganja', 'weed', 'lsd', 'ecstasy'],
'activity': ['selling', 'buying', 'distribution', 'possession', 'consumption', 'overdose', 'addiction']
}
# ------------------------
# Enhanced Data Loading
# ------------------------
@st.cache_data
def load_data(posts_file, comments_file, ward_file, district_file):
"""Load all data files with comprehensive error handling"""
data_status = {"posts": False, "comments": False, "wards": False, "districts": False}
# Load posts
try:
posts = pd.read_csv(posts_file, dtype=str)
posts = posts.drop_duplicates(subset=['id'], keep='first')
data_status["posts"] = True
st.sidebar.success(f"βœ… Posts loaded: {len(posts)} records")
except FileNotFoundError:
posts = pd.DataFrame()
st.sidebar.warning("⚠️ Reddit posts file not found")
except Exception as e:
posts = pd.DataFrame()
st.sidebar.error(f"❌ Error loading posts: {str(e)}")
# Load comments
try:
comments = pd.read_csv(comments_file)
if 'id' in comments.columns:
comments = comments.drop_duplicates(subset=['id'], keep='first')
data_status["comments"] = True
st.sidebar.success(f"βœ… Comments loaded: {len(comments)} records")
except FileNotFoundError:
comments = pd.DataFrame()
st.sidebar.warning("⚠️ Reddit comments file not found")
except Exception as e:
comments = pd.DataFrame()
st.sidebar.error(f"❌ Error loading comments: {str(e)}")
# Load ward coordinates
try:
wards = pd.read_csv(ward_file)
if 'ward_name' not in wards.columns and 'name' in wards.columns:
wards.rename(columns={'name': 'ward_name'}, inplace=True)
data_status["wards"] = True
st.sidebar.success(f"βœ… Wards loaded: {len(wards)} wards")
except FileNotFoundError:
wards = pd.DataFrame()
st.sidebar.warning("⚠️ Ward coordinates file not found")
except Exception as e:
wards = pd.DataFrame()
st.sidebar.error(f"❌ Error loading wards: {str(e)}")
# Load district coordinates
try:
districts = pd.read_csv(district_file)
if 'district_name' not in districts.columns and 'name' in districts.columns:
districts.rename(columns={'name': 'district_name'}, inplace=True)
data_status["districts"] = True
st.sidebar.success(f"βœ… Districts loaded: {len(districts)} districts")
except FileNotFoundError:
districts = pd.DataFrame()
st.sidebar.warning("⚠️ District coordinates file not found")
except Exception as e:
districts = pd.DataFrame()
st.sidebar.error(f"❌ Error loading districts: {str(e)}")
return posts, comments, wards, districts, data_status
# ------------------------
# Crime Analysis Functions
# ------------------------
def classify_crime_severity(text):
"""Classify posts by crime severity based on keywords"""
text_lower = str(text).lower()
severity_score = 0
for keyword in DRUG_KEYWORDS['high_risk']:
if keyword in text_lower:
severity_score += 3
for keyword in DRUG_KEYWORDS['substance']:
if keyword in text_lower:
severity_score += 2
for keyword in DRUG_KEYWORDS['activity']:
if keyword in text_lower:
severity_score += 1
if severity_score >= 5:
return 'Critical'
elif severity_score >= 3:
return 'High'
elif severity_score >= 1:
return 'Medium'
else:
return 'Low'
def extract_drug_mentions(text):
"""Extract specific drug mentions from text"""
text_lower = str(text).lower()
drugs_found = []
for drug in DRUG_KEYWORDS['substance']:
if drug in text_lower:
drugs_found.append(drug.capitalize())
return ', '.join(drugs_found) if drugs_found else 'Unspecified'
def calculate_threat_score(row):
"""Calculate threat score based on multiple factors"""
score = 0
text = str(row.get('text', '')) + ' ' + str(row.get('title', ''))
text_lower = text.lower()
for keyword in DRUG_KEYWORDS['high_risk']:
if keyword in text_lower:
score += 10
if 'score' in row:
score += min(int(row.get('score', 0)) / 10, 5)
if 'num_comments' in row:
score += min(int(row.get('num_comments', 0)) / 5, 5)
sentiment = TextBlob(text).sentiment.polarity
if sentiment < -0.2:
score += 5
return min(score, 100)
# ------------------------
# Load All Data
# ------------------------
posts_df, comments_df, wards_df, districts_df, data_status = load_data(
POSTS_FILE, COMMENTS_FILE, WARD_COORDS_FILE, DISTRICT_COORDS_FILE
)
# ------------------------
# Data Processing
# ------------------------
def process_datetime(df, datetime_col='created_utc'):
"""Process datetime column with robust error handling"""
if datetime_col not in df.columns:
return df
df["datetime"] = pd.to_datetime(df[datetime_col], errors='coerce')
df["date"] = df["datetime"].dt.date
df["hour"] = df["datetime"].dt.hour
df["day_of_week"] = df["datetime"].dt.day_name()
return df
# Normalize coordinate names
if not wards_df.empty and "ward_name" in wards_df.columns:
wards_df["ward_name"] = wards_df["ward_name"].astype(str).str.strip().str.lower()
if not districts_df.empty and "district_name" in districts_df.columns:
districts_df["district_name"] = districts_df["district_name"].astype(str).str.strip().str.lower()
# District mapping
district_mapping = {
"bangalore": "bengaluru",
"blr": "bengaluru",
"mysore": "mysuru",
}
# Create patterns
ward_pattern = None
district_pattern = None
if not wards_df.empty:
ward_list = wards_df["ward_name"].str.lower().tolist()
ward_pattern = r'\b(' + '|'.join(re.escape(w) for w in ward_list) + r')\b'
if not districts_df.empty:
district_list = districts_df["district_name"].str.lower().tolist()
district_pattern = r'\b(' + '|'.join(re.escape(d) for d in district_list) + r')\b'
def extract_locations(text_series, patterns):
"""Extract locations from text using regex patterns"""
locations = []
for text in text_series.fillna(""):
matches = []
for pattern in patterns:
matches.extend(re.findall(pattern, str(text).lower()))
matches = list(set(matches))
locations.append(", ".join(matches))
return pd.Series(locations, index=text_series.index)
# Process posts
if not posts_df.empty:
posts_df = process_datetime(posts_df)
post_text = (posts_df.get("title", "") + " " + posts_df.get("text", "")).fillna("")
if ward_pattern:
posts_df["ward_location"] = extract_locations(post_text, [ward_pattern])
else:
posts_df["ward_location"] = ""
if district_pattern:
posts_df["district_location"] = extract_locations(post_text, [district_pattern])
else:
posts_df["district_location"] = ""
posts_df["district_location"] = posts_df["district_location"].replace(district_mapping)
posts_df["severity"] = post_text.apply(classify_crime_severity)
posts_df["drugs_mentioned"] = post_text.apply(extract_drug_mentions)
posts_df["threat_score"] = posts_df.apply(calculate_threat_score, axis=1)
posts_df["sentiment_score"] = post_text.apply(lambda x: TextBlob(str(x)).sentiment.polarity)
posts_df["sentiment"] = posts_df["sentiment_score"].apply(
lambda x: "Positive" if x > 0 else ("Negative" if x < 0 else "Neutral")
)
# Process comments
if not comments_df.empty:
comments_df = process_datetime(comments_df)
# ------------------------
# Dashboard Header
# ------------------------
st.title("🚨 Reddit based Drug Crime Intelligence Dashboard")
st.markdown("**Real-time intelligence analysis of drug-related criminal activities from Reddit social media monitoring**")
# ------------------------
# Sidebar Filters
# ------------------------
st.sidebar.title("πŸ”§ Intelligence Controls")
if st.sidebar.button("πŸ”„ Refresh Data"):
st.cache_data.clear()
st.rerun()
# Severity filter
if not posts_df.empty and "severity" in posts_df.columns:
severity_filter = st.sidebar.multiselect(
"⚠️ Crime Severity Level",
options=['Critical', 'High', 'Medium', 'Low'],
default=['Critical', 'High']
)
if severity_filter:
posts_df = posts_df[posts_df["severity"].isin(severity_filter)]
# Date range filter
if not posts_df.empty and "datetime" in posts_df.columns:
min_date = posts_df["datetime"].min().date()
max_date = posts_df["datetime"].max().date()
date_range = st.sidebar.date_input(
"πŸ“… Select Date Range",
value=(min_date, max_date),
min_value=min_date,
max_value=max_date
)
if len(date_range) == 2:
posts_df = posts_df[
(posts_df["date"] >= date_range[0]) &
(posts_df["date"] <= date_range[1])
]
# Subreddit filter
if not posts_df.empty and "subreddit" in posts_df.columns:
subreddits = st.sidebar.multiselect(
"πŸ“± Filter by Subreddits",
options=posts_df["subreddit"].unique(),
default=posts_df["subreddit"].value_counts().head(5).index.tolist()
)
if subreddits:
posts_df = posts_df[posts_df["subreddit"].isin(subreddits)]
# Keyword search
search_keyword = st.sidebar.text_input("πŸ” Search Keywords in Content")
if search_keyword:
posts_df = posts_df[
posts_df["text"].str.contains(search_keyword, case=False, na=False) |
posts_df["title"].str.contains(search_keyword, case=False, na=False)
]
# ------------------------
# Main Dashboard Content
# ------------------------
if posts_df.empty and comments_df.empty:
st.error("🚫 No intelligence data available. Please ensure data collection is operational.")
st.stop()
# --- Crime Intelligence Metrics
st.subheader("πŸ“Š Crime Intelligence Overview")
col1, col2, col3, col4 = st.columns(4)
with col1:
critical_posts = len(posts_df[posts_df["severity"] == "Critical"]) if "severity" in posts_df.columns else 0
st.metric(
label="Critical Threats",
value=critical_posts,
delta=f"{(critical_posts/len(posts_df)*100):.1f}%" if len(posts_df) > 0 else "0%"
)
with col2:
avg_threat = posts_df["threat_score"].mean() if "threat_score" in posts_df.columns else 0
st.metric(
label="Avg Threat Score",
value=f"{avg_threat:.1f}",
delta="High" if avg_threat > 50 else "Moderate"
)
with col3:
if "ward_location" in posts_df.columns:
ward_exploded_temp = posts_df[posts_df["ward_location"] != ""].copy()
ward_exploded_temp["ward_location"] = ward_exploded_temp["ward_location"].str.split(", ")
ward_exploded_temp = ward_exploded_temp.explode("ward_location")
unique_locations = ward_exploded_temp["ward_location"].nunique()
st.metric(
label="Active Locations",
value=unique_locations
)
with col4:
drug_types = posts_df["drugs_mentioned"].str.split(", ").explode().nunique() if "drugs_mentioned" in posts_df.columns else 0
st.metric(
label="Drug Types Identified",
value=drug_types
)
st.markdown("---")
# --- Crime Severity Distribution
if "severity" in posts_df.columns:
st.subheader("⚠️ Crime Severity Analysis")
col1, col2 = st.columns(2)
with col1:
severity_counts = posts_df["severity"].value_counts()
fig_severity = px.pie(
values=severity_counts.values,
names=severity_counts.index,
title="Crime Severity Distribution",
color=severity_counts.index,
color_discrete_map={
'Critical': '#FF0000',
'High': '#FF6B00',
'Medium': '#FFD700',
'Low': '#90EE90'
}
)
st.plotly_chart(fig_severity, use_container_width=True)
with col2:
fig_threat = px.histogram(
posts_df,
x="threat_score",
nbins=20,
title="Threat Score Distribution",
labels={"threat_score": "Threat Score", "count": "Number of Posts"}
)
fig_threat.add_vline(x=50, line_dash="dash", line_color="red", annotation_text="High Threat Threshold")
st.plotly_chart(fig_threat, use_container_width=True)
st.markdown("---")
# --- Drug Type Analysis
if "drugs_mentioned" in posts_df.columns:
st.subheader("πŸ’Š Substance Intelligence")
all_drugs = posts_df["drugs_mentioned"].str.split(", ").explode()
drug_counts = all_drugs[all_drugs != "Unspecified"].value_counts().head(10)
if not drug_counts.empty:
fig_drugs = px.bar(
x=drug_counts.values,
y=drug_counts.index,
orientation='h',
title="Top 10 Substances Mentioned",
labels={"x": "Mentions", "y": "Substance"},
color=drug_counts.values,
color_continuous_scale="Reds"
)
st.plotly_chart(fig_drugs, use_container_width=True)
st.markdown("---")
# --- Timeline Analysis
if "date" in posts_df.columns:
st.subheader("πŸ“ˆ Crime Activity Timeline")
col1, col2 = st.columns(2)
with col1:
daily_data = posts_df.groupby(["date", "severity"]).size().reset_index(name="count")
fig_daily = px.line(
daily_data,
x="date",
y="count",
color="severity",
title="Daily Crime Activity by Severity",
labels={"count": "Number of Incidents", "date": "Date"},
color_discrete_map={
'Critical': '#FF0000',
'High': '#FF6B00',
'Medium': '#FFD700',
'Low': '#90EE90'
}
)
st.plotly_chart(fig_daily, use_container_width=True)
with col2:
if "hour" in posts_df.columns and "day_of_week" in posts_df.columns:
hourly_activity = posts_df.groupby(["day_of_week", "hour"]).size().reset_index(name="count")
fig_hourly = px.density_heatmap(
hourly_activity,
x="hour",
y="day_of_week",
z="count",
title="Activity Heatmap - High-Risk Hours",
labels={"hour": "Hour of Day", "day_of_week": "Day", "count": "Incidents"},
color_continuous_scale="Reds"
)
st.plotly_chart(fig_hourly, use_container_width=True)
st.markdown("---")
# --- Geographic Intelligence - COMBINED MAP
st.subheader("πŸ—ΊοΈ Geographic Crime Intelligence")
# Process both ward and district data
ward_data_available = not wards_df.empty and "ward_location" in posts_df.columns
district_data_available = not districts_df.empty and "district_location" in posts_df.columns
if ward_data_available or district_data_available:
st.markdown("**Crime hotspot analysis across Karnataka (Wards & Districts)**")
# Prepare ward data
merged_wards = pd.DataFrame()
if ward_data_available:
ward_posts = posts_df[posts_df["ward_location"] != ""].copy()
ward_exploded = ward_posts.copy()
ward_exploded["ward_location"] = ward_posts["ward_location"].str.split(", ")
ward_exploded = ward_exploded.explode("ward_location")
ward_exploded["ward_location"] = ward_exploded["ward_location"].str.strip().str.lower()
loc_counts = ward_exploded.groupby("ward_location").size().reset_index(name="count")
merged_wards = pd.merge(loc_counts, wards_df, left_on="ward_location", right_on="ward_name", how="inner")
merged_wards["location_type"] = "Ward"
merged_wards["location_name"] = merged_wards["ward_name"]
# Prepare district data
merged_districts = pd.DataFrame()
if district_data_available:
district_posts = posts_df[posts_df["district_location"] != ""].copy()
district_exploded = district_posts.copy()
district_exploded["district_location"] = district_posts["district_location"].str.split(", ")
district_exploded = district_exploded.explode("district_location")
district_exploded["district_location"] = district_exploded["district_location"].str.strip().str.lower()
district_counts = district_exploded.groupby("district_location").size().reset_index(name="count")
merged_districts = pd.merge(district_counts, districts_df, left_on="district_location", right_on="district_name", how="inner")
merged_districts["location_type"] = "District"
merged_districts["location_name"] = merged_districts["district_name"]
# Combine both datasets
all_locations = pd.concat([merged_wards, merged_districts], ignore_index=True)
if not all_locations.empty:
# Determine center of map
center_lat = all_locations["lat"].mean()
center_lon = all_locations["lon"].mean()
# Create unified map
m_unified = folium.Map(
location=[center_lat, center_lon],
zoom_start=9 if ward_data_available else 7,
tiles="OpenStreetMap"
)
# Add heatmap layer
heat_data = [[row["lat"], row["lon"], row["count"]] for _, row in all_locations.iterrows()]
HeatMap(heat_data, radius=20, blur=15, max_zoom=13, gradient={
0.0: 'blue', 0.5: 'yellow', 0.75: 'orange', 1.0: 'red'
}).add_to(m_unified)
# Determine hotspot threshold
threshold = all_locations["count"].quantile(0.70)
all_locations["is_hotspot"] = all_locations["count"] >= threshold
# Add markers for each location
for _, row in all_locations.iterrows():
location_name = row["location_name"].title()
location_type = row["location_type"]
incident_count = row["count"]
# Get location-specific crime data
if location_type == "Ward":
loc_data = posts_df[posts_df["ward_location"].str.contains(row["location_name"], case=False, na=False)]
else:
loc_data = posts_df[posts_df["district_location"].str.contains(row["location_name"], case=False, na=False)]
# Severity breakdown
severity_breakdown = loc_data["severity"].value_counts().to_dict()
severity_html = "<br>".join([f"&nbsp;&nbsp;β€’ {sev}: {count}" for sev, count in severity_breakdown.items()])
# Critical incidents count
critical_count = severity_breakdown.get("Critical", 0)
# Top drugs in this location
loc_drugs = loc_data["drugs_mentioned"].str.split(", ").explode()
top_drugs = loc_drugs[loc_drugs != "Unspecified"].value_counts().head(3)
drugs_html = "<br>".join([f"&nbsp;&nbsp;β€’ {drug}: {count}" for drug, count in top_drugs.items()])
# Average threat score
avg_threat = loc_data["threat_score"].mean()
# Recent high-threat incidents
recent = loc_data.nlargest(3, "threat_score")[["title", "severity", "threat_score"]]
incidents_html = "<br>".join([
f"&nbsp;&nbsp;β€’ <b>[{r['severity']}]</b> {r['title'][:50]}... <i>(Score: {r['threat_score']:.0f})</i>"
for _, r in recent.iterrows()
])
# Marker color based on severity
marker_color = 'darkred' if row["is_hotspot"] else ('red' if incident_count >= 5 else ('orange' if incident_count >= 3 else 'blue'))
# Icon based on type
icon_symbol = 'home' if location_type == "Ward" else 'map'
# Create detailed popup
popup_html = f"""
<div style='width: 350px; font-family: Arial, sans-serif;'>
<h3 style='color: {marker_color}; margin-bottom: 8px; border-bottom: 2px solid {marker_color}; padding-bottom: 5px;'>
{location_type}: {location_name}
</h3>
<div style='margin: 10px 0;'>
<b>πŸ“Š Total Incidents:</b> <span style='font-size: 18px; color: {marker_color};'>{incident_count}</span><br>
<b>🚨 Critical Threats:</b> <span style='font-size: 18px; color: darkred;'>{critical_count}</span><br>
<b>πŸ“ˆ Avg Threat Score:</b> <span style='font-size: 16px;'>{avg_threat:.1f}/100</span>
</div>
<hr style='border: 1px solid #ddd;'>
<div style='margin: 10px 0;'>
<b>⚠️ Severity Breakdown:</b><br>
{severity_html if severity_html else '&nbsp;&nbsp;No data'}
</div>
<hr style='border: 1px solid #ddd;'>
<div style='margin: 10px 0;'>
<b>πŸ’Š Top Substances Detected:</b><br>
{drugs_html if not top_drugs.empty else '&nbsp;&nbsp;None identified'}
</div>
<hr style='border: 1px solid #ddd;'>
<div style='margin: 10px 0;'>
<b>🎯 Recent High-Threat Incidents:</b><br>
{incidents_html if not recent.empty else '&nbsp;&nbsp;None'}
</div>
<div style='margin-top: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 5px; text-align: center; font-size: 11px;'>
<i>Click marker for details β€’ Hover for quick info</i>
</div>
</div>
"""
# Tooltip (hover text)
tooltip_text = f"""
<b>{location_type}: {location_name}</b><br>
Total Incidents: {incident_count}<br>
Critical: {critical_count} | Avg Threat: {avg_threat:.1f}
"""
# Add marker
folium.CircleMarker(
location=[row["lat"], row["lon"]],
radius=min(incident_count * 2.5 if location_type == "Ward" else incident_count * 3.5, 25),
color=marker_color,
fill=True,
fill_color=marker_color,
fill_opacity=0.7,
weight=2,
popup=folium.Popup(popup_html, max_width=400),
tooltip=folium.Tooltip(tooltip_text, sticky=True)
).add_to(m_unified)
# Display map
st_folium(m_unified, width="100%", height=700)
# Hotspot analysis table
st.subheader("πŸ”₯ Top Crime Hotspots")
col1 = st.columns(1)
with col1[0]:
st.markdown("**High-Activity Wards**")
if not merged_wards.empty:
ward_display = merged_wards.sort_values("count", ascending=False).head(10)
st.dataframe(
ward_display[["ward_name", "count"]].rename(columns={
"ward_name": "Ward Name",
"count": "Incidents"
}).reset_index(drop=True),
use_container_width=True,
height=300
)
else:
st.info("No ward data available")
st.markdown("---")
# --- High-Priority Intelligence Reports
st.subheader("🚨 High-Priority Intelligence Reports")
if not posts_df.empty:
priority_posts = posts_df[
(posts_df["severity"].isin(['Critical', 'High'])) |
(posts_df["threat_score"] >= 50)
].sort_values("threat_score", ascending=False)
if not priority_posts.empty:
priority_posts = priority_posts.drop_duplicates(subset=['id'], keep='first')
display_cols = ["datetime", "title", "severity", "threat_score", "drugs_mentioned", "ward_location", "subreddit"]
available_cols = [col for col in display_cols if col in priority_posts.columns]
st.dataframe(
priority_posts[available_cols].head(50).rename(columns={
"datetime": "Timestamp",
"title": "Intelligence Report",
"severity": "Severity",
"threat_score": "Threat Score",
"drugs_mentioned": "Substances",
"ward_location": "Location",
"subreddit": "Source"
}),
use_container_width=True,
height=400
)
st.download_button(
label="πŸ“₯ Download Priority Reports (CSV)",
data=priority_posts[available_cols].to_csv(index=False).encode("utf-8"),
file_name=f"priority_intelligence_{datetime.now().strftime('%Y%m%d')}.csv",
mime="text/csv"
)
else:
st.info("No high-priority incidents in selected date range")
else:
st.info("No intelligence data available")
st.markdown("---")
# --- Advanced Analytics Section
st.subheader("πŸ”¬ Advanced Crime Analytics")
col1, col2 = st.columns(2)
with col1:
if "hour" in posts_df.columns and "severity" in posts_df.columns:
st.markdown("**Crime Patterns by Time of Day**")
time_severity = posts_df.groupby(["hour", "severity"]).size().reset_index(name="count")
fig_time = px.bar(
time_severity,
x="hour",
y="count",
color="severity",
title="Crime Activity by Hour and Severity",
labels={"hour": "Hour of Day", "count": "Incidents"},
color_discrete_map={
'Critical': '#FF0000',
'High': '#FF6B00',
'Medium': '#FFD700',
'Low': '#90EE90'
}
)
st.plotly_chart(fig_time, use_container_width=True)
with col2:
if "sentiment_score" in posts_df.columns and "severity" in posts_df.columns:
st.markdown("**Sentiment vs Crime Severity**")
fig_sentiment_severity = px.box(
posts_df,
x="severity",
y="sentiment_score",
color="severity",
title="Sentiment Distribution by Crime Severity",
labels={"sentiment_score": "Sentiment Score", "severity": "Crime Severity"},
color_discrete_map={
'Critical': '#FF0000',
'High': '#FF6B00',
'Medium': '#FFD700',
'Low': '#90EE90'
}
)
st.plotly_chart(fig_sentiment_severity, use_container_width=True)
st.markdown("---")
# --- Network Analysis
if "subreddit" in posts_df.columns and "drugs_mentioned" in posts_df.columns:
st.subheader("πŸ•ΈοΈ Source-Substance Network Analysis")
source_drug = posts_df[posts_df["drugs_mentioned"] != "Unspecified"].groupby(
["subreddit", "drugs_mentioned"]
).size().reset_index(name="mentions")
if not source_drug.empty:
top_relationships = source_drug.nlargest(15, "mentions")
fig_network = px.bar(
top_relationships,
x="mentions",
y="subreddit",
color="drugs_mentioned",
orientation='h',
title="Top Source-Substance Relationships",
labels={"mentions": "Number of Mentions", "subreddit": "Source Community"},
height=500
)
st.plotly_chart(fig_network, use_container_width=True)
st.markdown("---")
# --- Emerging Threats Detection
st.subheader("⚑ Emerging Threats Detection")
if "date" in posts_df.columns and "threat_score" in posts_df.columns:
today = posts_df["date"].max()
last_week = today - timedelta(days=7)
prev_week = last_week - timedelta(days=7)
recent_threats = posts_df[posts_df["date"] >= last_week]["threat_score"].mean()
previous_threats = posts_df[(posts_df["date"] >= prev_week) & (posts_df["date"] < last_week)]["threat_score"].mean()
threat_change = ((recent_threats - previous_threats) / previous_threats * 100) if previous_threats > 0 else 0
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
"Threat Level Trend",
f"{recent_threats:.1f}",
f"{threat_change:+.1f}%",
delta_color="inverse"
)
with col2:
recent_locs = set(posts_df[posts_df["date"] >= last_week]["ward_location"].str.split(", ").explode())
prev_locs = set(posts_df[posts_df["date"] < last_week]["ward_location"].str.split(", ").explode())
new_locations = len(recent_locs - prev_locs)
st.metric("New Active Locations", new_locations)
with col3:
daily_avg = posts_df.groupby("date").size().mean()
recent_avg = posts_df[posts_df["date"] >= last_week].groupby("date").size().mean()
spike = recent_avg > daily_avg * 1.5
st.metric("Activity Status", "⚠️ SPIKE" if spike else "βœ… Normal")
st.markdown("---")
# --- Intelligence Summary Report
st.subheader("πŸ“‹ Executive Intelligence Summary")
summary_col1, summary_col2 = st.columns(2)
with summary_col1:
st.markdown("**Key Findings:**")
if not posts_df.empty:
if "ward_location" in posts_df.columns and "threat_score" in posts_df.columns:
ward_posts_with_location = posts_df[posts_df["ward_location"] != ""].copy()
if not ward_posts_with_location.empty:
ward_exploded_threat = ward_posts_with_location.copy()
ward_exploded_threat["ward_location"] = ward_posts_with_location["ward_location"].str.split(", ")
ward_exploded_threat = ward_exploded_threat.explode("ward_location").reset_index(drop=True)
ward_threat = ward_exploded_threat.groupby("ward_location")["threat_score"].mean().sort_values(ascending=False)
if not ward_threat.empty:
st.markdown(f"🎯 **Highest Threat Zone:** {ward_threat.index[0].title()} (Score: {ward_threat.iloc[0]:.1f})")
if "drugs_mentioned" in posts_df.columns:
top_drug = posts_df["drugs_mentioned"].str.split(", ").explode().value_counts()
if len(top_drug) > 0 and top_drug.index[0] != "Unspecified":
st.markdown(f"πŸ’Š **Primary Substance:** {top_drug.index[0]} ({top_drug.iloc[0]} mentions)")
if "hour" in posts_df.columns:
peak_hour = posts_df["hour"].mode()[0]
st.markdown(f"πŸ• **Peak Activity Time:** {peak_hour}:00 - {peak_hour+1}:00")
if "subreddit" in posts_df.columns:
top_source = posts_df["subreddit"].value_counts().index[0]
st.markdown(f"πŸ“± **Primary Intelligence Source:** r/{top_source}")
with summary_col2:
st.markdown("**Risk Assessment:**")
if not posts_df.empty and "severity" in posts_df.columns:
critical_pct = (len(posts_df[posts_df["severity"] == "Critical"]) / len(posts_df) * 100)
if critical_pct > 30:
risk_level = "πŸ”΄ CRITICAL"
risk_desc = "Immediate action required"
elif critical_pct > 15:
risk_level = "🟠 HIGH"
risk_desc = "Enhanced monitoring recommended"
elif critical_pct > 5:
risk_level = "🟑 MODERATE"
risk_desc = "Standard surveillance protocols"
else:
risk_level = "🟒 LOW"
risk_desc = "Routine monitoring sufficient"
st.markdown(f"**Overall Risk Level:** {risk_level}")
st.markdown(f"*{risk_desc}*")
st.markdown(f"- Critical incidents: {critical_pct:.1f}%")
st.markdown(f"- Total monitored incidents: {len(posts_df)}")
st.markdown(f"- Date range: {posts_df['date'].min()} to {posts_df['date'].max()}")
st.markdown("---")
# --- Export Options
st.subheader("πŸ“€ Export Intelligence Reports")
export_col1, export_col2, export_col3 = st.columns(3)
with export_col1:
if not posts_df.empty:
full_export = posts_df.to_csv(index=False).encode("utf-8")
st.download_button(
label="πŸ“Š Full Dataset",
data=full_export,
file_name=f"intelligence_full_{datetime.now().strftime('%Y%m%d')}.csv",
mime="text/csv"
)
with export_col2:
if "severity" in posts_df.columns:
critical_data = posts_df[posts_df["severity"] == "Critical"]
if not critical_data.empty:
critical_export = critical_data.to_csv(index=False).encode("utf-8")
st.download_button(
label="🚨 Critical Incidents",
data=critical_export,
file_name=f"critical_incidents_{datetime.now().strftime('%Y%m%d')}.csv",
mime="text/csv"
)
with export_col3:
if 'merged_wards' in locals() and not merged_wards.empty:
location_export = merged_wards.to_csv(index=False).encode("utf-8")
st.download_button(
label="πŸ—ΊοΈ Location Analysis",
data=location_export,
file_name=f"location_analysis_{datetime.now().strftime('%Y%m%d')}.csv",
mime="text/csv"
)
st.markdown("---")
# --- System Status Footer
st.markdown("**πŸ”’ Intelligence System Status:**")
status_cols = st.columns(4)
with status_cols[0]:
st.write("πŸ“„ Posts:", "βœ… Online" if data_status["posts"] else "❌ Offline")
with status_cols[1]:
st.write("πŸ’¬ Comments:", "βœ… Online" if data_status["comments"] else "❌ Offline")
with status_cols[2]:
st.write("🏘️ Wards:", "βœ… Online" if data_status["wards"] else "❌ Offline")
with status_cols[3]:
st.write("🌍 Districts:", "βœ… Online" if data_status["districts"] else "❌ Offline")
try:
file_mod_time = datetime.fromtimestamp(os.path.getmtime(POSTS_FILE))
st.markdown(f"*Intelligence data last updated: {file_mod_time.strftime('%Y-%m-%d %H:%M:%S')}*")
except:
pass
st.markdown("---")