Spaces:

lawlevisan
/

Reddit-Analysis

Sleeping

App Files Files Community

Reddit-Analysis / src /streamlit_app.py

lawlevisan

Update src/streamlit_app.py

e8b553c verified 4 months ago

raw

history blame contribute delete

35.1 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import folium
	from folium.plugins import HeatMap, MarkerCluster
	from streamlit_folium import st_folium
	from datetime import datetime, timedelta
	import re
	import os
	from textblob import TextBlob

	# ------------------------
	# Config
	# ------------------------
	st.set_page_config(
	page_title="Reddit based Drug Crime Intelligence Dashboard",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Paths to data files
	POSTS_FILE = "data/processed/reddit_posts_filtered.csv"
	COMMENTS_FILE = "data/processed/reddit_comments_filtered.csv"
	WARD_COORDS_FILE = "data/bangalore_wards_coordinates.csv"
	DISTRICT_COORDS_FILE = "data/karnataka_districts_coordinates.csv"

	# Drug-related keywords for classification
	DRUG_KEYWORDS = {
	'high_risk': ['dealing', 'dealer', 'supply', 'trafficking', 'smuggling', 'cartel', 'seized', 'arrest', 'raid'],
	'substance': ['cocaine', 'heroin', 'mdma', 'meth', 'cannabis', 'marijuana', 'ganja', 'weed', 'lsd', 'ecstasy'],
	'activity': ['selling', 'buying', 'distribution', 'possession', 'consumption', 'overdose', 'addiction']
	}

	# ------------------------
	# Enhanced Data Loading
	# ------------------------
	@st.cache_data
	def load_data(posts_file, comments_file, ward_file, district_file):
	"""Load all data files with comprehensive error handling"""
	data_status = {"posts": False, "comments": False, "wards": False, "districts": False}

	# Load posts
	try:
	posts = pd.read_csv(posts_file, dtype=str)
	posts = posts.drop_duplicates(subset=['id'], keep='first')
	data_status["posts"] = True
	st.sidebar.success(f"✅ Posts loaded: {len(posts)} records")
	except FileNotFoundError:
	posts = pd.DataFrame()
	st.sidebar.warning("⚠️ Reddit posts file not found")
	except Exception as e:
	posts = pd.DataFrame()
	st.sidebar.error(f"❌ Error loading posts: {str(e)}")

	# Load comments
	try:
	comments = pd.read_csv(comments_file)
	if 'id' in comments.columns:
	comments = comments.drop_duplicates(subset=['id'], keep='first')
	data_status["comments"] = True
	st.sidebar.success(f"✅ Comments loaded: {len(comments)} records")
	except FileNotFoundError:
	comments = pd.DataFrame()
	st.sidebar.warning("⚠️ Reddit comments file not found")
	except Exception as e:
	comments = pd.DataFrame()
	st.sidebar.error(f"❌ Error loading comments: {str(e)}")

	# Load ward coordinates
	try:
	wards = pd.read_csv(ward_file)
	if 'ward_name' not in wards.columns and 'name' in wards.columns:
	wards.rename(columns={'name': 'ward_name'}, inplace=True)
	data_status["wards"] = True
	st.sidebar.success(f"✅ Wards loaded: {len(wards)} wards")
	except FileNotFoundError:
	wards = pd.DataFrame()
	st.sidebar.warning("⚠️ Ward coordinates file not found")
	except Exception as e:
	wards = pd.DataFrame()
	st.sidebar.error(f"❌ Error loading wards: {str(e)}")

	# Load district coordinates
	try:
	districts = pd.read_csv(district_file)
	if 'district_name' not in districts.columns and 'name' in districts.columns:
	districts.rename(columns={'name': 'district_name'}, inplace=True)
	data_status["districts"] = True
	st.sidebar.success(f"✅ Districts loaded: {len(districts)} districts")
	except FileNotFoundError:
	districts = pd.DataFrame()
	st.sidebar.warning("⚠️ District coordinates file not found")
	except Exception as e:
	districts = pd.DataFrame()
	st.sidebar.error(f"❌ Error loading districts: {str(e)}")

	return posts, comments, wards, districts, data_status

	# ------------------------
	# Crime Analysis Functions
	# ------------------------
	def classify_crime_severity(text):
	"""Classify posts by crime severity based on keywords"""
	text_lower = str(text).lower()
	severity_score = 0

	for keyword in DRUG_KEYWORDS['high_risk']:
	if keyword in text_lower:
	severity_score += 3

	for keyword in DRUG_KEYWORDS['substance']:
	if keyword in text_lower:
	severity_score += 2

	for keyword in DRUG_KEYWORDS['activity']:
	if keyword in text_lower:
	severity_score += 1

	if severity_score >= 5:
	return 'Critical'
	elif severity_score >= 3:
	return 'High'
	elif severity_score >= 1:
	return 'Medium'
	else:
	return 'Low'

	def extract_drug_mentions(text):
	"""Extract specific drug mentions from text"""
	text_lower = str(text).lower()
	drugs_found = []
	for drug in DRUG_KEYWORDS['substance']:
	if drug in text_lower:
	drugs_found.append(drug.capitalize())
	return ', '.join(drugs_found) if drugs_found else 'Unspecified'

	def calculate_threat_score(row):
	"""Calculate threat score based on multiple factors"""
	score = 0
	text = str(row.get('text', '')) + ' ' + str(row.get('title', ''))
	text_lower = text.lower()

	for keyword in DRUG_KEYWORDS['high_risk']:
	if keyword in text_lower:
	score += 10

	if 'score' in row:
	score += min(int(row.get('score', 0)) / 10, 5)

	if 'num_comments' in row:
	score += min(int(row.get('num_comments', 0)) / 5, 5)

	sentiment = TextBlob(text).sentiment.polarity
	if sentiment < -0.2:
	score += 5

	return min(score, 100)

	# ------------------------
	# Load All Data
	# ------------------------
	posts_df, comments_df, wards_df, districts_df, data_status = load_data(
	POSTS_FILE, COMMENTS_FILE, WARD_COORDS_FILE, DISTRICT_COORDS_FILE
	)

	# ------------------------
	# Data Processing
	# ------------------------
	def process_datetime(df, datetime_col='created_utc'):
	"""Process datetime column with robust error handling"""
	if datetime_col not in df.columns:
	return df

	df["datetime"] = pd.to_datetime(df[datetime_col], errors='coerce')
	df["date"] = df["datetime"].dt.date
	df["hour"] = df["datetime"].dt.hour
	df["day_of_week"] = df["datetime"].dt.day_name()
	return df

	# Normalize coordinate names
	if not wards_df.empty and "ward_name" in wards_df.columns:
	wards_df["ward_name"] = wards_df["ward_name"].astype(str).str.strip().str.lower()

	if not districts_df.empty and "district_name" in districts_df.columns:
	districts_df["district_name"] = districts_df["district_name"].astype(str).str.strip().str.lower()

	# District mapping
	district_mapping = {
	"bangalore": "bengaluru",
	"blr": "bengaluru",
	"mysore": "mysuru",
	}

	# Create patterns
	ward_pattern = None
	district_pattern = None

	if not wards_df.empty:
	ward_list = wards_df["ward_name"].str.lower().tolist()
	ward_pattern = r'\b(' + '\|'.join(re.escape(w) for w in ward_list) + r')\b'

	if not districts_df.empty:
	district_list = districts_df["district_name"].str.lower().tolist()
	district_pattern = r'\b(' + '\|'.join(re.escape(d) for d in district_list) + r')\b'

	def extract_locations(text_series, patterns):
	"""Extract locations from text using regex patterns"""
	locations = []
	for text in text_series.fillna(""):
	matches = []
	for pattern in patterns:
	matches.extend(re.findall(pattern, str(text).lower()))
	matches = list(set(matches))
	locations.append(", ".join(matches))
	return pd.Series(locations, index=text_series.index)

	# Process posts
	if not posts_df.empty:
	posts_df = process_datetime(posts_df)

	post_text = (posts_df.get("title", "") + " " + posts_df.get("text", "")).fillna("")

	if ward_pattern:
	posts_df["ward_location"] = extract_locations(post_text, [ward_pattern])
	else:
	posts_df["ward_location"] = ""

	if district_pattern:
	posts_df["district_location"] = extract_locations(post_text, [district_pattern])
	else:
	posts_df["district_location"] = ""

	posts_df["district_location"] = posts_df["district_location"].replace(district_mapping)

	posts_df["severity"] = post_text.apply(classify_crime_severity)
	posts_df["drugs_mentioned"] = post_text.apply(extract_drug_mentions)
	posts_df["threat_score"] = posts_df.apply(calculate_threat_score, axis=1)

	posts_df["sentiment_score"] = post_text.apply(lambda x: TextBlob(str(x)).sentiment.polarity)
	posts_df["sentiment"] = posts_df["sentiment_score"].apply(
	lambda x: "Positive" if x > 0 else ("Negative" if x < 0 else "Neutral")
	)

	# Process comments
	if not comments_df.empty:
	comments_df = process_datetime(comments_df)

	# ------------------------
	# Dashboard Header
	# ------------------------
	st.title("🚨 Reddit based Drug Crime Intelligence Dashboard")
	st.markdown("Real-time intelligence analysis of drug-related criminal activities from Reddit social media monitoring")

	# ------------------------
	# Sidebar Filters
	# ------------------------
	st.sidebar.title("🔧 Intelligence Controls")

	if st.sidebar.button("🔄 Refresh Data"):
	st.cache_data.clear()
	st.rerun()

	# Severity filter
	if not posts_df.empty and "severity" in posts_df.columns:
	severity_filter = st.sidebar.multiselect(
	"⚠️ Crime Severity Level",
	options=['Critical', 'High', 'Medium', 'Low'],
	default=['Critical', 'High']
	)
	if severity_filter:
	posts_df = posts_df[posts_df["severity"].isin(severity_filter)]

	# Date range filter
	if not posts_df.empty and "datetime" in posts_df.columns:
	min_date = posts_df["datetime"].min().date()
	max_date = posts_df["datetime"].max().date()

	date_range = st.sidebar.date_input(
	"📅 Select Date Range",
	value=(min_date, max_date),
	min_value=min_date,
	max_value=max_date
	)

	if len(date_range) == 2:
	posts_df = posts_df[
	(posts_df["date"] >= date_range[0]) &
	(posts_df["date"] <= date_range[1])
	]

	# Subreddit filter
	if not posts_df.empty and "subreddit" in posts_df.columns:
	subreddits = st.sidebar.multiselect(
	"📱 Filter by Subreddits",
	options=posts_df["subreddit"].unique(),
	default=posts_df["subreddit"].value_counts().head(5).index.tolist()
	)
	if subreddits:
	posts_df = posts_df[posts_df["subreddit"].isin(subreddits)]

	# Keyword search
	search_keyword = st.sidebar.text_input("🔍 Search Keywords in Content")
	if search_keyword:
	posts_df = posts_df[
	posts_df["text"].str.contains(search_keyword, case=False, na=False) \|
	posts_df["title"].str.contains(search_keyword, case=False, na=False)
	]

	# ------------------------
	# Main Dashboard Content
	# ------------------------

	if posts_df.empty and comments_df.empty:
	st.error("🚫 No intelligence data available. Please ensure data collection is operational.")
	st.stop()

	# --- Crime Intelligence Metrics
	st.subheader("📊 Crime Intelligence Overview")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	critical_posts = len(posts_df[posts_df["severity"] == "Critical"]) if "severity" in posts_df.columns else 0
	st.metric(
	label="Critical Threats",
	value=critical_posts,
	delta=f"{(critical_posts/len(posts_df)*100):.1f}%" if len(posts_df) > 0 else "0%"
	)

	with col2:
	avg_threat = posts_df["threat_score"].mean() if "threat_score" in posts_df.columns else 0
	st.metric(
	label="Avg Threat Score",
	value=f"{avg_threat:.1f}",
	delta="High" if avg_threat > 50 else "Moderate"
	)

	with col3:
	if "ward_location" in posts_df.columns:
	ward_exploded_temp = posts_df[posts_df["ward_location"] != ""].copy()
	ward_exploded_temp["ward_location"] = ward_exploded_temp["ward_location"].str.split(", ")
	ward_exploded_temp = ward_exploded_temp.explode("ward_location")
	unique_locations = ward_exploded_temp["ward_location"].nunique()
	st.metric(
	label="Active Locations",
	value=unique_locations
	)

	with col4:
	drug_types = posts_df["drugs_mentioned"].str.split(", ").explode().nunique() if "drugs_mentioned" in posts_df.columns else 0
	st.metric(
	label="Drug Types Identified",
	value=drug_types
	)

	st.markdown("---")

	# --- Crime Severity Distribution
	if "severity" in posts_df.columns:
	st.subheader("⚠️ Crime Severity Analysis")

	col1, col2 = st.columns(2)

	with col1:
	severity_counts = posts_df["severity"].value_counts()
	fig_severity = px.pie(
	values=severity_counts.values,
	names=severity_counts.index,
	title="Crime Severity Distribution",
	color=severity_counts.index,
	color_discrete_map={
	'Critical': '#FF0000',
	'High': '#FF6B00',
	'Medium': '#FFD700',
	'Low': '#90EE90'
	}
	)
	st.plotly_chart(fig_severity, use_container_width=True)

	with col2:
	fig_threat = px.histogram(
	posts_df,
	x="threat_score",
	nbins=20,
	title="Threat Score Distribution",
	labels={"threat_score": "Threat Score", "count": "Number of Posts"}
	)
	fig_threat.add_vline(x=50, line_dash="dash", line_color="red", annotation_text="High Threat Threshold")
	st.plotly_chart(fig_threat, use_container_width=True)

	st.markdown("---")

	# --- Drug Type Analysis
	if "drugs_mentioned" in posts_df.columns:
	st.subheader("💊 Substance Intelligence")

	all_drugs = posts_df["drugs_mentioned"].str.split(", ").explode()
	drug_counts = all_drugs[all_drugs != "Unspecified"].value_counts().head(10)

	if not drug_counts.empty:
	fig_drugs = px.bar(
	x=drug_counts.values,
	y=drug_counts.index,
	orientation='h',
	title="Top 10 Substances Mentioned",
	labels={"x": "Mentions", "y": "Substance"},
	color=drug_counts.values,
	color_continuous_scale="Reds"
	)
	st.plotly_chart(fig_drugs, use_container_width=True)

	st.markdown("---")

	# --- Timeline Analysis
	if "date" in posts_df.columns:
	st.subheader("📈 Crime Activity Timeline")

	col1, col2 = st.columns(2)

	with col1:
	daily_data = posts_df.groupby(["date", "severity"]).size().reset_index(name="count")
	fig_daily = px.line(
	daily_data,
	x="date",
	y="count",
	color="severity",
	title="Daily Crime Activity by Severity",
	labels={"count": "Number of Incidents", "date": "Date"},
	color_discrete_map={
	'Critical': '#FF0000',
	'High': '#FF6B00',
	'Medium': '#FFD700',
	'Low': '#90EE90'
	}
	)
	st.plotly_chart(fig_daily, use_container_width=True)

	with col2:
	if "hour" in posts_df.columns and "day_of_week" in posts_df.columns:
	hourly_activity = posts_df.groupby(["day_of_week", "hour"]).size().reset_index(name="count")
	fig_hourly = px.density_heatmap(
	hourly_activity,
	x="hour",
	y="day_of_week",
	z="count",
	title="Activity Heatmap - High-Risk Hours",
	labels={"hour": "Hour of Day", "day_of_week": "Day", "count": "Incidents"},
	color_continuous_scale="Reds"
	)
	st.plotly_chart(fig_hourly, use_container_width=True)

	st.markdown("---")

	# --- Geographic Intelligence - COMBINED MAP
	st.subheader("🗺️ Geographic Crime Intelligence")

	# Process both ward and district data
	ward_data_available = not wards_df.empty and "ward_location" in posts_df.columns
	district_data_available = not districts_df.empty and "district_location" in posts_df.columns

	if ward_data_available or district_data_available:
	st.markdown("Crime hotspot analysis across Karnataka (Wards & Districts)")

	# Prepare ward data
	merged_wards = pd.DataFrame()
	if ward_data_available:
	ward_posts = posts_df[posts_df["ward_location"] != ""].copy()
	ward_exploded = ward_posts.copy()
	ward_exploded["ward_location"] = ward_posts["ward_location"].str.split(", ")
	ward_exploded = ward_exploded.explode("ward_location")
	ward_exploded["ward_location"] = ward_exploded["ward_location"].str.strip().str.lower()

	loc_counts = ward_exploded.groupby("ward_location").size().reset_index(name="count")
	merged_wards = pd.merge(loc_counts, wards_df, left_on="ward_location", right_on="ward_name", how="inner")
	merged_wards["location_type"] = "Ward"
	merged_wards["location_name"] = merged_wards["ward_name"]

	# Prepare district data
	merged_districts = pd.DataFrame()
	if district_data_available:
	district_posts = posts_df[posts_df["district_location"] != ""].copy()
	district_exploded = district_posts.copy()
	district_exploded["district_location"] = district_posts["district_location"].str.split(", ")
	district_exploded = district_exploded.explode("district_location")
	district_exploded["district_location"] = district_exploded["district_location"].str.strip().str.lower()

	district_counts = district_exploded.groupby("district_location").size().reset_index(name="count")
	merged_districts = pd.merge(district_counts, districts_df, left_on="district_location", right_on="district_name", how="inner")
	merged_districts["location_type"] = "District"
	merged_districts["location_name"] = merged_districts["district_name"]

	# Combine both datasets
	all_locations = pd.concat([merged_wards, merged_districts], ignore_index=True)

	if not all_locations.empty:
	# Determine center of map
	center_lat = all_locations["lat"].mean()
	center_lon = all_locations["lon"].mean()

	# Create unified map
	m_unified = folium.Map(
	location=[center_lat, center_lon],
	zoom_start=9 if ward_data_available else 7,
	tiles="OpenStreetMap"
	)

	# Add heatmap layer
	heat_data = [[row["lat"], row["lon"], row["count"]] for _, row in all_locations.iterrows()]
	HeatMap(heat_data, radius=20, blur=15, max_zoom=13, gradient={
	0.0: 'blue', 0.5: 'yellow', 0.75: 'orange', 1.0: 'red'
	}).add_to(m_unified)

	# Determine hotspot threshold
	threshold = all_locations["count"].quantile(0.70)
	all_locations["is_hotspot"] = all_locations["count"] >= threshold

	# Add markers for each location
	for _, row in all_locations.iterrows():
	location_name = row["location_name"].title()
	location_type = row["location_type"]
	incident_count = row["count"]

	# Get location-specific crime data
	if location_type == "Ward":
	loc_data = posts_df[posts_df["ward_location"].str.contains(row["location_name"], case=False, na=False)]
	else:
	loc_data = posts_df[posts_df["district_location"].str.contains(row["location_name"], case=False, na=False)]

	# Severity breakdown
	severity_breakdown = loc_data["severity"].value_counts().to_dict()
	severity_html = "<br>".join([f"  • {sev}: {count}" for sev, count in severity_breakdown.items()])

	# Critical incidents count
	critical_count = severity_breakdown.get("Critical", 0)

	# Top drugs in this location
	loc_drugs = loc_data["drugs_mentioned"].str.split(", ").explode()
	top_drugs = loc_drugs[loc_drugs != "Unspecified"].value_counts().head(3)
	drugs_html = "<br>".join([f"  • {drug}: {count}" for drug, count in top_drugs.items()])

	# Average threat score
	avg_threat = loc_data["threat_score"].mean()

	# Recent high-threat incidents
	recent = loc_data.nlargest(3, "threat_score")[["title", "severity", "threat_score"]]
	incidents_html = "<br>".join([
	f"  • <b>[{r['severity']}]</b> {r['title'][:50]}... <i>(Score: {r['threat_score']:.0f})</i>"
	for _, r in recent.iterrows()
	])

	# Marker color based on severity
	marker_color = 'darkred' if row["is_hotspot"] else ('red' if incident_count >= 5 else ('orange' if incident_count >= 3 else 'blue'))

	# Icon based on type
	icon_symbol = 'home' if location_type == "Ward" else 'map'

	# Create detailed popup
	popup_html = f"""
	<div style='width: 350px; font-family: Arial, sans-serif;'>
	<h3 style='color: {marker_color}; margin-bottom: 8px; border-bottom: 2px solid {marker_color}; padding-bottom: 5px;'>
	{location_type}: {location_name}
	</h3>
	<div style='margin: 10px 0;'>
	<b>📊 Total Incidents:</b> <span style='font-size: 18px; color: {marker_color};'>{incident_count}</span><br>
	<b>🚨 Critical Threats:</b> <span style='font-size: 18px; color: darkred;'>{critical_count}</span><br>
	<b>📈 Avg Threat Score:</b> <span style='font-size: 16px;'>{avg_threat:.1f}/100</span>
	</div>
	<hr style='border: 1px solid #ddd;'>
	<div style='margin: 10px 0;'>
	<b>⚠️ Severity Breakdown:</b><br>
	{severity_html if severity_html else '  No data'}
	</div>
	<hr style='border: 1px solid #ddd;'>
	<div style='margin: 10px 0;'>
	<b>💊 Top Substances Detected:</b><br>
	{drugs_html if not top_drugs.empty else '  None identified'}
	</div>
	<hr style='border: 1px solid #ddd;'>
	<div style='margin: 10px 0;'>
	<b>🎯 Recent High-Threat Incidents:</b><br>
	{incidents_html if not recent.empty else '  None'}
	</div>
	<div style='margin-top: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 5px; text-align: center; font-size: 11px;'>
	<i>Click marker for details • Hover for quick info</i>
	</div>
	</div>
	"""

	# Tooltip (hover text)
	tooltip_text = f"""
	<b>{location_type}: {location_name}</b><br>
	Total Incidents: {incident_count}<br>
	Critical: {critical_count} \| Avg Threat: {avg_threat:.1f}
	"""

	# Add marker
	folium.CircleMarker(
	location=[row["lat"], row["lon"]],
	radius=min(incident_count * 2.5 if location_type == "Ward" else incident_count * 3.5, 25),
	color=marker_color,
	fill=True,
	fill_color=marker_color,
	fill_opacity=0.7,
	weight=2,
	popup=folium.Popup(popup_html, max_width=400),
	tooltip=folium.Tooltip(tooltip_text, sticky=True)
	).add_to(m_unified)

	# Display map
	st_folium(m_unified, width="100%", height=700)

	# Hotspot analysis table
	st.subheader("🔥 Top Crime Hotspots")

	col1 = st.columns(1)

	with col1[0]:
	st.markdown("High-Activity Wards")
	if not merged_wards.empty:
	ward_display = merged_wards.sort_values("count", ascending=False).head(10)
	st.dataframe(
	ward_display[["ward_name", "count"]].rename(columns={
	"ward_name": "Ward Name",
	"count": "Incidents"
	}).reset_index(drop=True),
	use_container_width=True,
	height=300
	)
	else:
	st.info("No ward data available")

	st.markdown("---")

	# --- High-Priority Intelligence Reports
	st.subheader("🚨 High-Priority Intelligence Reports")

	if not posts_df.empty:
	priority_posts = posts_df[
	(posts_df["severity"].isin(['Critical', 'High'])) \|
	(posts_df["threat_score"] >= 50)
	].sort_values("threat_score", ascending=False)

	if not priority_posts.empty:
	priority_posts = priority_posts.drop_duplicates(subset=['id'], keep='first')

	display_cols = ["datetime", "title", "severity", "threat_score", "drugs_mentioned", "ward_location", "subreddit"]
	available_cols = [col for col in display_cols if col in priority_posts.columns]

	st.dataframe(
	priority_posts[available_cols].head(50).rename(columns={
	"datetime": "Timestamp",
	"title": "Intelligence Report",
	"severity": "Severity",
	"threat_score": "Threat Score",
	"drugs_mentioned": "Substances",
	"ward_location": "Location",
	"subreddit": "Source"
	}),
	use_container_width=True,
	height=400
	)

	st.download_button(
	label="📥 Download Priority Reports (CSV)",
	data=priority_posts[available_cols].to_csv(index=False).encode("utf-8"),
	file_name=f"priority_intelligence_{datetime.now().strftime('%Y%m%d')}.csv",
	mime="text/csv"
	)
	else:
	st.info("No high-priority incidents in selected date range")
	else:
	st.info("No intelligence data available")

	st.markdown("---")

	# --- Advanced Analytics Section
	st.subheader("🔬 Advanced Crime Analytics")

	col1, col2 = st.columns(2)

	with col1:
	if "hour" in posts_df.columns and "severity" in posts_df.columns:
	st.markdown("Crime Patterns by Time of Day")
	time_severity = posts_df.groupby(["hour", "severity"]).size().reset_index(name="count")
	fig_time = px.bar(
	time_severity,
	x="hour",
	y="count",
	color="severity",
	title="Crime Activity by Hour and Severity",
	labels={"hour": "Hour of Day", "count": "Incidents"},
	color_discrete_map={
	'Critical': '#FF0000',
	'High': '#FF6B00',
	'Medium': '#FFD700',
	'Low': '#90EE90'
	}
	)
	st.plotly_chart(fig_time, use_container_width=True)

	with col2:
	if "sentiment_score" in posts_df.columns and "severity" in posts_df.columns:
	st.markdown("Sentiment vs Crime Severity")
	fig_sentiment_severity = px.box(
	posts_df,
	x="severity",
	y="sentiment_score",
	color="severity",
	title="Sentiment Distribution by Crime Severity",
	labels={"sentiment_score": "Sentiment Score", "severity": "Crime Severity"},
	color_discrete_map={
	'Critical': '#FF0000',
	'High': '#FF6B00',
	'Medium': '#FFD700',
	'Low': '#90EE90'
	}
	)
	st.plotly_chart(fig_sentiment_severity, use_container_width=True)

	st.markdown("---")

	# --- Network Analysis
	if "subreddit" in posts_df.columns and "drugs_mentioned" in posts_df.columns:
	st.subheader("🕸️ Source-Substance Network Analysis")

	source_drug = posts_df[posts_df["drugs_mentioned"] != "Unspecified"].groupby(
	["subreddit", "drugs_mentioned"]
	).size().reset_index(name="mentions")

	if not source_drug.empty:
	top_relationships = source_drug.nlargest(15, "mentions")

	fig_network = px.bar(
	top_relationships,
	x="mentions",
	y="subreddit",
	color="drugs_mentioned",
	orientation='h',
	title="Top Source-Substance Relationships",
	labels={"mentions": "Number of Mentions", "subreddit": "Source Community"},
	height=500
	)
	st.plotly_chart(fig_network, use_container_width=True)

	st.markdown("---")

	# --- Emerging Threats Detection
	st.subheader("⚡ Emerging Threats Detection")

	if "date" in posts_df.columns and "threat_score" in posts_df.columns:
	today = posts_df["date"].max()
	last_week = today - timedelta(days=7)
	prev_week = last_week - timedelta(days=7)

	recent_threats = posts_df[posts_df["date"] >= last_week]["threat_score"].mean()
	previous_threats = posts_df[(posts_df["date"] >= prev_week) & (posts_df["date"] < last_week)]["threat_score"].mean()

	threat_change = ((recent_threats - previous_threats) / previous_threats * 100) if previous_threats > 0 else 0

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric(
	"Threat Level Trend",
	f"{recent_threats:.1f}",
	f"{threat_change:+.1f}%",
	delta_color="inverse"
	)

	with col2:
	recent_locs = set(posts_df[posts_df["date"] >= last_week]["ward_location"].str.split(", ").explode())
	prev_locs = set(posts_df[posts_df["date"] < last_week]["ward_location"].str.split(", ").explode())
	new_locations = len(recent_locs - prev_locs)
	st.metric("New Active Locations", new_locations)

	with col3:
	daily_avg = posts_df.groupby("date").size().mean()
	recent_avg = posts_df[posts_df["date"] >= last_week].groupby("date").size().mean()
	spike = recent_avg > daily_avg * 1.5
	st.metric("Activity Status", "⚠️ SPIKE" if spike else "✅ Normal")

	st.markdown("---")

	# --- Intelligence Summary Report
	st.subheader("📋 Executive Intelligence Summary")

	summary_col1, summary_col2 = st.columns(2)

	with summary_col1:
	st.markdown("Key Findings:")

	if not posts_df.empty:
	if "ward_location" in posts_df.columns and "threat_score" in posts_df.columns:
	ward_posts_with_location = posts_df[posts_df["ward_location"] != ""].copy()
	if not ward_posts_with_location.empty:
	ward_exploded_threat = ward_posts_with_location.copy()
	ward_exploded_threat["ward_location"] = ward_posts_with_location["ward_location"].str.split(", ")
	ward_exploded_threat = ward_exploded_threat.explode("ward_location").reset_index(drop=True)

	ward_threat = ward_exploded_threat.groupby("ward_location")["threat_score"].mean().sort_values(ascending=False)

	if not ward_threat.empty:
	st.markdown(f"🎯 Highest Threat Zone: {ward_threat.index[0].title()} (Score: {ward_threat.iloc[0]:.1f})")

	if "drugs_mentioned" in posts_df.columns:
	top_drug = posts_df["drugs_mentioned"].str.split(", ").explode().value_counts()
	if len(top_drug) > 0 and top_drug.index[0] != "Unspecified":
	st.markdown(f"💊 Primary Substance: {top_drug.index[0]} ({top_drug.iloc[0]} mentions)")

	if "hour" in posts_df.columns:
	peak_hour = posts_df["hour"].mode()[0]
	st.markdown(f"🕐 Peak Activity Time: {peak_hour}:00 - {peak_hour+1}:00")

	if "subreddit" in posts_df.columns:
	top_source = posts_df["subreddit"].value_counts().index[0]
	st.markdown(f"📱 Primary Intelligence Source: r/{top_source}")

	with summary_col2:
	st.markdown("Risk Assessment:")

	if not posts_df.empty and "severity" in posts_df.columns:
	critical_pct = (len(posts_df[posts_df["severity"] == "Critical"]) / len(posts_df) * 100)

	if critical_pct > 30:
	risk_level = "🔴 CRITICAL"
	risk_desc = "Immediate action required"
	elif critical_pct > 15:
	risk_level = "🟠 HIGH"
	risk_desc = "Enhanced monitoring recommended"
	elif critical_pct > 5:
	risk_level = "🟡 MODERATE"
	risk_desc = "Standard surveillance protocols"
	else:
	risk_level = "🟢 LOW"
	risk_desc = "Routine monitoring sufficient"

	st.markdown(f"Overall Risk Level: {risk_level}")
	st.markdown(f"{risk_desc}")
	st.markdown(f"- Critical incidents: {critical_pct:.1f}%")
	st.markdown(f"- Total monitored incidents: {len(posts_df)}")
	st.markdown(f"- Date range: {posts_df['date'].min()} to {posts_df['date'].max()}")

	st.markdown("---")

	# --- Export Options
	st.subheader("📤 Export Intelligence Reports")

	export_col1, export_col2, export_col3 = st.columns(3)

	with export_col1:
	if not posts_df.empty:
	full_export = posts_df.to_csv(index=False).encode("utf-8")
	st.download_button(
	label="📊 Full Dataset",
	data=full_export,
	file_name=f"intelligence_full_{datetime.now().strftime('%Y%m%d')}.csv",
	mime="text/csv"
	)

	with export_col2:
	if "severity" in posts_df.columns:
	critical_data = posts_df[posts_df["severity"] == "Critical"]
	if not critical_data.empty:
	critical_export = critical_data.to_csv(index=False).encode("utf-8")
	st.download_button(
	label="🚨 Critical Incidents",
	data=critical_export,
	file_name=f"critical_incidents_{datetime.now().strftime('%Y%m%d')}.csv",
	mime="text/csv"
	)

	with export_col3:
	if 'merged_wards' in locals() and not merged_wards.empty:
	location_export = merged_wards.to_csv(index=False).encode("utf-8")
	st.download_button(
	label="🗺️ Location Analysis",
	data=location_export,
	file_name=f"location_analysis_{datetime.now().strftime('%Y%m%d')}.csv",
	mime="text/csv"
	)

	st.markdown("---")

	# --- System Status Footer
	st.markdown("🔒 Intelligence System Status:")
	status_cols = st.columns(4)
	with status_cols[0]:
	st.write("📄 Posts:", "✅ Online" if data_status["posts"] else "❌ Offline")
	with status_cols[1]:
	st.write("💬 Comments:", "✅ Online" if data_status["comments"] else "❌ Offline")
	with status_cols[2]:
	st.write("🏘️ Wards:", "✅ Online" if data_status["wards"] else "❌ Offline")
	with status_cols[3]:
	st.write("🌍 Districts:", "✅ Online" if data_status["districts"] else "❌ Offline")

	try:
	file_mod_time = datetime.fromtimestamp(os.path.getmtime(POSTS_FILE))
	st.markdown(f"Intelligence data last updated: {file_mod_time.strftime('%Y-%m-%d %H:%M:%S')}")
	except:
	pass

	st.markdown("---")