Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +902 -38
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,904 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
import plotly.graph_objects as go
|
| 5 |
+
from wordcloud import WordCloud
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import folium
|
| 8 |
+
from folium.plugins import HeatMap, MarkerCluster
|
| 9 |
+
from streamlit_folium import st_folium
|
| 10 |
+
from datetime import datetime, timedelta
|
| 11 |
+
import re
|
| 12 |
+
import os
|
| 13 |
+
from textblob import TextBlob
|
| 14 |
+
|
| 15 |
+
# ------------------------
|
| 16 |
+
# Config
|
| 17 |
+
# ------------------------
|
| 18 |
+
st.set_page_config(
|
| 19 |
+
page_title="Reddit based Drug Crime Intelligence Dashboard",
|
| 20 |
+
layout="wide",
|
| 21 |
+
initial_sidebar_state="expanded"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Paths to data files
|
| 25 |
+
POSTS_FILE = "data/processed/reddit_posts_filtered.csv"
|
| 26 |
+
COMMENTS_FILE = "data/processed/reddit_comments_filtered.csv"
|
| 27 |
+
WARD_COORDS_FILE = "data/bangalore_wards_coordinates.csv"
|
| 28 |
+
DISTRICT_COORDS_FILE = "data/karnataka_districts_coordinates.csv"
|
| 29 |
+
|
| 30 |
+
# Drug-related keywords for classification
|
| 31 |
+
DRUG_KEYWORDS = {
|
| 32 |
+
'high_risk': ['dealing', 'dealer', 'supply', 'trafficking', 'smuggling', 'cartel', 'seized', 'arrest', 'raid'],
|
| 33 |
+
'substance': ['cocaine', 'heroin', 'mdma', 'meth', 'cannabis', 'marijuana', 'ganja', 'weed', 'lsd', 'ecstasy'],
|
| 34 |
+
'activity': ['selling', 'buying', 'distribution', 'possession', 'consumption', 'overdose', 'addiction']
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# ------------------------
|
| 38 |
+
# Enhanced Data Loading
|
| 39 |
+
# ------------------------
|
| 40 |
+
@st.cache_data
|
| 41 |
+
def load_data(posts_file, comments_file, ward_file, district_file):
|
| 42 |
+
"""Load all data files with comprehensive error handling"""
|
| 43 |
+
data_status = {"posts": False, "comments": False, "wards": False, "districts": False}
|
| 44 |
+
|
| 45 |
+
# Load posts
|
| 46 |
+
try:
|
| 47 |
+
posts = pd.read_csv(posts_file, dtype=str)
|
| 48 |
+
posts = posts.drop_duplicates(subset=['id'], keep='first')
|
| 49 |
+
data_status["posts"] = True
|
| 50 |
+
st.sidebar.success(f"β
Posts loaded: {len(posts)} records")
|
| 51 |
+
except FileNotFoundError:
|
| 52 |
+
posts = pd.DataFrame()
|
| 53 |
+
st.sidebar.warning("β οΈ Reddit posts file not found")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
posts = pd.DataFrame()
|
| 56 |
+
st.sidebar.error(f"β Error loading posts: {str(e)}")
|
| 57 |
+
|
| 58 |
+
# Load comments
|
| 59 |
+
try:
|
| 60 |
+
comments = pd.read_csv(comments_file)
|
| 61 |
+
if 'id' in comments.columns:
|
| 62 |
+
comments = comments.drop_duplicates(subset=['id'], keep='first')
|
| 63 |
+
data_status["comments"] = True
|
| 64 |
+
st.sidebar.success(f"β
Comments loaded: {len(comments)} records")
|
| 65 |
+
except FileNotFoundError:
|
| 66 |
+
comments = pd.DataFrame()
|
| 67 |
+
st.sidebar.warning("β οΈ Reddit comments file not found")
|
| 68 |
+
except Exception as e:
|
| 69 |
+
comments = pd.DataFrame()
|
| 70 |
+
st.sidebar.error(f"β Error loading comments: {str(e)}")
|
| 71 |
+
|
| 72 |
+
# Load ward coordinates
|
| 73 |
+
try:
|
| 74 |
+
wards = pd.read_csv(ward_file)
|
| 75 |
+
if 'ward_name' not in wards.columns and 'name' in wards.columns:
|
| 76 |
+
wards.rename(columns={'name': 'ward_name'}, inplace=True)
|
| 77 |
+
data_status["wards"] = True
|
| 78 |
+
st.sidebar.success(f"β
Wards loaded: {len(wards)} wards")
|
| 79 |
+
except FileNotFoundError:
|
| 80 |
+
wards = pd.DataFrame()
|
| 81 |
+
st.sidebar.warning("β οΈ Ward coordinates file not found")
|
| 82 |
+
except Exception as e:
|
| 83 |
+
wards = pd.DataFrame()
|
| 84 |
+
st.sidebar.error(f"β Error loading wards: {str(e)}")
|
| 85 |
+
|
| 86 |
+
# Load district coordinates
|
| 87 |
+
try:
|
| 88 |
+
districts = pd.read_csv(district_file)
|
| 89 |
+
if 'district_name' not in districts.columns and 'name' in districts.columns:
|
| 90 |
+
districts.rename(columns={'name': 'district_name'}, inplace=True)
|
| 91 |
+
data_status["districts"] = True
|
| 92 |
+
st.sidebar.success(f"β
Districts loaded: {len(districts)} districts")
|
| 93 |
+
except FileNotFoundError:
|
| 94 |
+
districts = pd.DataFrame()
|
| 95 |
+
st.sidebar.warning("β οΈ District coordinates file not found")
|
| 96 |
+
except Exception as e:
|
| 97 |
+
districts = pd.DataFrame()
|
| 98 |
+
st.sidebar.error(f"β Error loading districts: {str(e)}")
|
| 99 |
+
|
| 100 |
+
return posts, comments, wards, districts, data_status
|
| 101 |
+
|
| 102 |
+
# ------------------------
|
| 103 |
+
# Crime Analysis Functions
|
| 104 |
+
# ------------------------
|
| 105 |
+
def classify_crime_severity(text):
|
| 106 |
+
"""Classify posts by crime severity based on keywords"""
|
| 107 |
+
text_lower = str(text).lower()
|
| 108 |
+
severity_score = 0
|
| 109 |
+
|
| 110 |
+
for keyword in DRUG_KEYWORDS['high_risk']:
|
| 111 |
+
if keyword in text_lower:
|
| 112 |
+
severity_score += 3
|
| 113 |
+
|
| 114 |
+
for keyword in DRUG_KEYWORDS['substance']:
|
| 115 |
+
if keyword in text_lower:
|
| 116 |
+
severity_score += 2
|
| 117 |
+
|
| 118 |
+
for keyword in DRUG_KEYWORDS['activity']:
|
| 119 |
+
if keyword in text_lower:
|
| 120 |
+
severity_score += 1
|
| 121 |
+
|
| 122 |
+
if severity_score >= 5:
|
| 123 |
+
return 'Critical'
|
| 124 |
+
elif severity_score >= 3:
|
| 125 |
+
return 'High'
|
| 126 |
+
elif severity_score >= 1:
|
| 127 |
+
return 'Medium'
|
| 128 |
+
else:
|
| 129 |
+
return 'Low'
|
| 130 |
+
|
| 131 |
+
def extract_drug_mentions(text):
|
| 132 |
+
"""Extract specific drug mentions from text"""
|
| 133 |
+
text_lower = str(text).lower()
|
| 134 |
+
drugs_found = []
|
| 135 |
+
for drug in DRUG_KEYWORDS['substance']:
|
| 136 |
+
if drug in text_lower:
|
| 137 |
+
drugs_found.append(drug.capitalize())
|
| 138 |
+
return ', '.join(drugs_found) if drugs_found else 'Unspecified'
|
| 139 |
+
|
| 140 |
+
def calculate_threat_score(row):
|
| 141 |
+
"""Calculate threat score based on multiple factors"""
|
| 142 |
+
score = 0
|
| 143 |
+
text = str(row.get('text', '')) + ' ' + str(row.get('title', ''))
|
| 144 |
+
text_lower = text.lower()
|
| 145 |
+
|
| 146 |
+
for keyword in DRUG_KEYWORDS['high_risk']:
|
| 147 |
+
if keyword in text_lower:
|
| 148 |
+
score += 10
|
| 149 |
+
|
| 150 |
+
if 'score' in row:
|
| 151 |
+
score += min(int(row.get('score', 0)) / 10, 5)
|
| 152 |
+
|
| 153 |
+
if 'num_comments' in row:
|
| 154 |
+
score += min(int(row.get('num_comments', 0)) / 5, 5)
|
| 155 |
+
|
| 156 |
+
sentiment = TextBlob(text).sentiment.polarity
|
| 157 |
+
if sentiment < -0.2:
|
| 158 |
+
score += 5
|
| 159 |
+
|
| 160 |
+
return min(score, 100)
|
| 161 |
+
|
| 162 |
+
# ------------------------
|
| 163 |
+
# Load All Data
|
| 164 |
+
# ------------------------
|
| 165 |
+
posts_df, comments_df, wards_df, districts_df, data_status = load_data(
|
| 166 |
+
POSTS_FILE, COMMENTS_FILE, WARD_COORDS_FILE, DISTRICT_COORDS_FILE
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# ------------------------
|
| 170 |
+
# Data Processing
|
| 171 |
+
# ------------------------
|
| 172 |
+
def process_datetime(df, datetime_col='created_utc'):
|
| 173 |
+
"""Process datetime column with robust error handling"""
|
| 174 |
+
if datetime_col not in df.columns:
|
| 175 |
+
return df
|
| 176 |
+
|
| 177 |
+
df["datetime"] = pd.to_datetime(df[datetime_col], errors='coerce')
|
| 178 |
+
df["date"] = df["datetime"].dt.date
|
| 179 |
+
df["hour"] = df["datetime"].dt.hour
|
| 180 |
+
df["day_of_week"] = df["datetime"].dt.day_name()
|
| 181 |
+
return df
|
| 182 |
+
|
| 183 |
+
# Normalize coordinate names
|
| 184 |
+
if not wards_df.empty and "ward_name" in wards_df.columns:
|
| 185 |
+
wards_df["ward_name"] = wards_df["ward_name"].astype(str).str.strip().str.lower()
|
| 186 |
+
|
| 187 |
+
if not districts_df.empty and "district_name" in districts_df.columns:
|
| 188 |
+
districts_df["district_name"] = districts_df["district_name"].astype(str).str.strip().str.lower()
|
| 189 |
+
|
| 190 |
+
# District mapping
|
| 191 |
+
district_mapping = {
|
| 192 |
+
"bangalore": "bengaluru",
|
| 193 |
+
"blr": "bengaluru",
|
| 194 |
+
"mysore": "mysuru",
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
# Create patterns
|
| 198 |
+
ward_pattern = None
|
| 199 |
+
district_pattern = None
|
| 200 |
+
|
| 201 |
+
if not wards_df.empty:
|
| 202 |
+
ward_list = wards_df["ward_name"].str.lower().tolist()
|
| 203 |
+
ward_pattern = r'\b(' + '|'.join(re.escape(w) for w in ward_list) + r')\b'
|
| 204 |
+
|
| 205 |
+
if not districts_df.empty:
|
| 206 |
+
district_list = districts_df["district_name"].str.lower().tolist()
|
| 207 |
+
district_pattern = r'\b(' + '|'.join(re.escape(d) for d in district_list) + r')\b'
|
| 208 |
+
|
| 209 |
+
def extract_locations(text_series, patterns):
|
| 210 |
+
"""Extract locations from text using regex patterns"""
|
| 211 |
+
locations = []
|
| 212 |
+
for text in text_series.fillna(""):
|
| 213 |
+
matches = []
|
| 214 |
+
for pattern in patterns:
|
| 215 |
+
matches.extend(re.findall(pattern, str(text).lower()))
|
| 216 |
+
matches = list(set(matches))
|
| 217 |
+
locations.append(", ".join(matches))
|
| 218 |
+
return pd.Series(locations, index=text_series.index)
|
| 219 |
+
|
| 220 |
+
# Process posts
|
| 221 |
+
if not posts_df.empty:
|
| 222 |
+
posts_df = process_datetime(posts_df)
|
| 223 |
+
|
| 224 |
+
post_text = (posts_df.get("title", "") + " " + posts_df.get("text", "")).fillna("")
|
| 225 |
+
|
| 226 |
+
if ward_pattern:
|
| 227 |
+
posts_df["ward_location"] = extract_locations(post_text, [ward_pattern])
|
| 228 |
+
else:
|
| 229 |
+
posts_df["ward_location"] = ""
|
| 230 |
+
|
| 231 |
+
if district_pattern:
|
| 232 |
+
posts_df["district_location"] = extract_locations(post_text, [district_pattern])
|
| 233 |
+
else:
|
| 234 |
+
posts_df["district_location"] = ""
|
| 235 |
+
|
| 236 |
+
posts_df["district_location"] = posts_df["district_location"].replace(district_mapping)
|
| 237 |
+
|
| 238 |
+
posts_df["severity"] = post_text.apply(classify_crime_severity)
|
| 239 |
+
posts_df["drugs_mentioned"] = post_text.apply(extract_drug_mentions)
|
| 240 |
+
posts_df["threat_score"] = posts_df.apply(calculate_threat_score, axis=1)
|
| 241 |
+
|
| 242 |
+
posts_df["sentiment_score"] = post_text.apply(lambda x: TextBlob(str(x)).sentiment.polarity)
|
| 243 |
+
posts_df["sentiment"] = posts_df["sentiment_score"].apply(
|
| 244 |
+
lambda x: "Positive" if x > 0 else ("Negative" if x < 0 else "Neutral")
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
# Process comments
|
| 248 |
+
if not comments_df.empty:
|
| 249 |
+
comments_df = process_datetime(comments_df)
|
| 250 |
+
|
| 251 |
+
# ------------------------
|
| 252 |
+
# Dashboard Header
|
| 253 |
+
# ------------------------
|
| 254 |
+
st.title("π¨ Reddit based Drug Crime Intelligence Dashboard")
|
| 255 |
+
st.markdown("**Real-time intelligence analysis of drug-related criminal activities from Reddit social media monitoring**")
|
| 256 |
+
|
| 257 |
+
# ------------------------
|
| 258 |
+
# Sidebar Filters
|
| 259 |
+
# ------------------------
|
| 260 |
+
st.sidebar.title("π§ Intelligence Controls")
|
| 261 |
+
|
| 262 |
+
if st.sidebar.button("π Refresh Data"):
|
| 263 |
+
st.cache_data.clear()
|
| 264 |
+
st.rerun()
|
| 265 |
+
|
| 266 |
+
# Severity filter
|
| 267 |
+
if not posts_df.empty and "severity" in posts_df.columns:
|
| 268 |
+
severity_filter = st.sidebar.multiselect(
|
| 269 |
+
"β οΈ Crime Severity Level",
|
| 270 |
+
options=['Critical', 'High', 'Medium', 'Low'],
|
| 271 |
+
default=['Critical', 'High']
|
| 272 |
+
)
|
| 273 |
+
if severity_filter:
|
| 274 |
+
posts_df = posts_df[posts_df["severity"].isin(severity_filter)]
|
| 275 |
+
|
| 276 |
+
# Date range filter
|
| 277 |
+
if not posts_df.empty and "datetime" in posts_df.columns:
|
| 278 |
+
min_date = posts_df["datetime"].min().date()
|
| 279 |
+
max_date = posts_df["datetime"].max().date()
|
| 280 |
+
|
| 281 |
+
date_range = st.sidebar.date_input(
|
| 282 |
+
"π
Select Date Range",
|
| 283 |
+
value=(min_date, max_date),
|
| 284 |
+
min_value=min_date,
|
| 285 |
+
max_value=max_date
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
if len(date_range) == 2:
|
| 289 |
+
posts_df = posts_df[
|
| 290 |
+
(posts_df["date"] >= date_range[0]) &
|
| 291 |
+
(posts_df["date"] <= date_range[1])
|
| 292 |
+
]
|
| 293 |
+
|
| 294 |
+
# Subreddit filter
|
| 295 |
+
if not posts_df.empty and "subreddit" in posts_df.columns:
|
| 296 |
+
subreddits = st.sidebar.multiselect(
|
| 297 |
+
"π± Filter by Subreddits",
|
| 298 |
+
options=posts_df["subreddit"].unique(),
|
| 299 |
+
default=posts_df["subreddit"].value_counts().head(5).index.tolist()
|
| 300 |
+
)
|
| 301 |
+
if subreddits:
|
| 302 |
+
posts_df = posts_df[posts_df["subreddit"].isin(subreddits)]
|
| 303 |
+
|
| 304 |
+
# Keyword search
|
| 305 |
+
search_keyword = st.sidebar.text_input("π Search Keywords in Content")
|
| 306 |
+
if search_keyword:
|
| 307 |
+
posts_df = posts_df[
|
| 308 |
+
posts_df["text"].str.contains(search_keyword, case=False, na=False) |
|
| 309 |
+
posts_df["title"].str.contains(search_keyword, case=False, na=False)
|
| 310 |
+
]
|
| 311 |
+
|
| 312 |
+
# ------------------------
|
| 313 |
+
# Main Dashboard Content
|
| 314 |
+
# ------------------------
|
| 315 |
+
|
| 316 |
+
if posts_df.empty and comments_df.empty:
|
| 317 |
+
st.error("π« No intelligence data available. Please ensure data collection is operational.")
|
| 318 |
+
st.stop()
|
| 319 |
+
|
| 320 |
+
# --- Crime Intelligence Metrics
|
| 321 |
+
st.subheader("π Crime Intelligence Overview")
|
| 322 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 323 |
+
|
| 324 |
+
with col1:
|
| 325 |
+
critical_posts = len(posts_df[posts_df["severity"] == "Critical"]) if "severity" in posts_df.columns else 0
|
| 326 |
+
st.metric(
|
| 327 |
+
label="Critical Threats",
|
| 328 |
+
value=critical_posts,
|
| 329 |
+
delta=f"{(critical_posts/len(posts_df)*100):.1f}%" if len(posts_df) > 0 else "0%"
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
with col2:
|
| 333 |
+
avg_threat = posts_df["threat_score"].mean() if "threat_score" in posts_df.columns else 0
|
| 334 |
+
st.metric(
|
| 335 |
+
label="Avg Threat Score",
|
| 336 |
+
value=f"{avg_threat:.1f}",
|
| 337 |
+
delta="High" if avg_threat > 50 else "Moderate"
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
with col3:
|
| 341 |
+
if "ward_location" in posts_df.columns:
|
| 342 |
+
ward_exploded_temp = posts_df[posts_df["ward_location"] != ""].copy()
|
| 343 |
+
ward_exploded_temp["ward_location"] = ward_exploded_temp["ward_location"].str.split(", ")
|
| 344 |
+
ward_exploded_temp = ward_exploded_temp.explode("ward_location")
|
| 345 |
+
unique_locations = ward_exploded_temp["ward_location"].nunique()
|
| 346 |
+
st.metric(
|
| 347 |
+
label="Active Locations",
|
| 348 |
+
value=unique_locations
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
with col4:
|
| 352 |
+
drug_types = posts_df["drugs_mentioned"].str.split(", ").explode().nunique() if "drugs_mentioned" in posts_df.columns else 0
|
| 353 |
+
st.metric(
|
| 354 |
+
label="Drug Types Identified",
|
| 355 |
+
value=drug_types
|
| 356 |
+
)
|
| 357 |
+
|
| 358 |
+
st.markdown("---")
|
| 359 |
+
|
| 360 |
+
# --- Crime Severity Distribution
|
| 361 |
+
if "severity" in posts_df.columns:
|
| 362 |
+
st.subheader("β οΈ Crime Severity Analysis")
|
| 363 |
+
|
| 364 |
+
col1, col2 = st.columns(2)
|
| 365 |
+
|
| 366 |
+
with col1:
|
| 367 |
+
severity_counts = posts_df["severity"].value_counts()
|
| 368 |
+
fig_severity = px.pie(
|
| 369 |
+
values=severity_counts.values,
|
| 370 |
+
names=severity_counts.index,
|
| 371 |
+
title="Crime Severity Distribution",
|
| 372 |
+
color=severity_counts.index,
|
| 373 |
+
color_discrete_map={
|
| 374 |
+
'Critical': '#FF0000',
|
| 375 |
+
'High': '#FF6B00',
|
| 376 |
+
'Medium': '#FFD700',
|
| 377 |
+
'Low': '#90EE90'
|
| 378 |
+
}
|
| 379 |
+
)
|
| 380 |
+
st.plotly_chart(fig_severity, use_container_width=True)
|
| 381 |
+
|
| 382 |
+
with col2:
|
| 383 |
+
fig_threat = px.histogram(
|
| 384 |
+
posts_df,
|
| 385 |
+
x="threat_score",
|
| 386 |
+
nbins=20,
|
| 387 |
+
title="Threat Score Distribution",
|
| 388 |
+
labels={"threat_score": "Threat Score", "count": "Number of Posts"}
|
| 389 |
+
)
|
| 390 |
+
fig_threat.add_vline(x=50, line_dash="dash", line_color="red", annotation_text="High Threat Threshold")
|
| 391 |
+
st.plotly_chart(fig_threat, use_container_width=True)
|
| 392 |
+
|
| 393 |
+
st.markdown("---")
|
| 394 |
+
|
| 395 |
+
# --- Drug Type Analysis
|
| 396 |
+
if "drugs_mentioned" in posts_df.columns:
|
| 397 |
+
st.subheader("π Substance Intelligence")
|
| 398 |
+
|
| 399 |
+
all_drugs = posts_df["drugs_mentioned"].str.split(", ").explode()
|
| 400 |
+
drug_counts = all_drugs[all_drugs != "Unspecified"].value_counts().head(10)
|
| 401 |
+
|
| 402 |
+
if not drug_counts.empty:
|
| 403 |
+
fig_drugs = px.bar(
|
| 404 |
+
x=drug_counts.values,
|
| 405 |
+
y=drug_counts.index,
|
| 406 |
+
orientation='h',
|
| 407 |
+
title="Top 10 Substances Mentioned",
|
| 408 |
+
labels={"x": "Mentions", "y": "Substance"},
|
| 409 |
+
color=drug_counts.values,
|
| 410 |
+
color_continuous_scale="Reds"
|
| 411 |
+
)
|
| 412 |
+
st.plotly_chart(fig_drugs, use_container_width=True)
|
| 413 |
+
|
| 414 |
+
st.markdown("---")
|
| 415 |
+
|
| 416 |
+
# --- Timeline Analysis
|
| 417 |
+
if "date" in posts_df.columns:
|
| 418 |
+
st.subheader("π Crime Activity Timeline")
|
| 419 |
+
|
| 420 |
+
col1, col2 = st.columns(2)
|
| 421 |
+
|
| 422 |
+
with col1:
|
| 423 |
+
daily_data = posts_df.groupby(["date", "severity"]).size().reset_index(name="count")
|
| 424 |
+
fig_daily = px.line(
|
| 425 |
+
daily_data,
|
| 426 |
+
x="date",
|
| 427 |
+
y="count",
|
| 428 |
+
color="severity",
|
| 429 |
+
title="Daily Crime Activity by Severity",
|
| 430 |
+
labels={"count": "Number of Incidents", "date": "Date"},
|
| 431 |
+
color_discrete_map={
|
| 432 |
+
'Critical': '#FF0000',
|
| 433 |
+
'High': '#FF6B00',
|
| 434 |
+
'Medium': '#FFD700',
|
| 435 |
+
'Low': '#90EE90'
|
| 436 |
+
}
|
| 437 |
+
)
|
| 438 |
+
st.plotly_chart(fig_daily, use_container_width=True)
|
| 439 |
+
|
| 440 |
+
with col2:
|
| 441 |
+
if "hour" in posts_df.columns and "day_of_week" in posts_df.columns:
|
| 442 |
+
hourly_activity = posts_df.groupby(["day_of_week", "hour"]).size().reset_index(name="count")
|
| 443 |
+
fig_hourly = px.density_heatmap(
|
| 444 |
+
hourly_activity,
|
| 445 |
+
x="hour",
|
| 446 |
+
y="day_of_week",
|
| 447 |
+
z="count",
|
| 448 |
+
title="Activity Heatmap - High-Risk Hours",
|
| 449 |
+
labels={"hour": "Hour of Day", "day_of_week": "Day", "count": "Incidents"},
|
| 450 |
+
color_continuous_scale="Reds"
|
| 451 |
+
)
|
| 452 |
+
st.plotly_chart(fig_hourly, use_container_width=True)
|
| 453 |
+
|
| 454 |
+
st.markdown("---")
|
| 455 |
+
|
| 456 |
+
# --- Geographic Intelligence - COMBINED MAP
|
| 457 |
+
st.subheader("πΊοΈ Geographic Crime Intelligence")
|
| 458 |
+
|
| 459 |
+
# Process both ward and district data
|
| 460 |
+
ward_data_available = not wards_df.empty and "ward_location" in posts_df.columns
|
| 461 |
+
district_data_available = not districts_df.empty and "district_location" in posts_df.columns
|
| 462 |
+
|
| 463 |
+
if ward_data_available or district_data_available:
|
| 464 |
+
st.markdown("**Crime hotspot analysis across Karnataka (Wards & Districts)**")
|
| 465 |
+
|
| 466 |
+
# Prepare ward data
|
| 467 |
+
merged_wards = pd.DataFrame()
|
| 468 |
+
if ward_data_available:
|
| 469 |
+
ward_posts = posts_df[posts_df["ward_location"] != ""].copy()
|
| 470 |
+
ward_exploded = ward_posts.copy()
|
| 471 |
+
ward_exploded["ward_location"] = ward_posts["ward_location"].str.split(", ")
|
| 472 |
+
ward_exploded = ward_exploded.explode("ward_location")
|
| 473 |
+
ward_exploded["ward_location"] = ward_exploded["ward_location"].str.strip().str.lower()
|
| 474 |
+
|
| 475 |
+
loc_counts = ward_exploded.groupby("ward_location").size().reset_index(name="count")
|
| 476 |
+
merged_wards = pd.merge(loc_counts, wards_df, left_on="ward_location", right_on="ward_name", how="inner")
|
| 477 |
+
merged_wards["location_type"] = "Ward"
|
| 478 |
+
merged_wards["location_name"] = merged_wards["ward_name"]
|
| 479 |
+
|
| 480 |
+
# Prepare district data
|
| 481 |
+
merged_districts = pd.DataFrame()
|
| 482 |
+
if district_data_available:
|
| 483 |
+
district_posts = posts_df[posts_df["district_location"] != ""].copy()
|
| 484 |
+
district_exploded = district_posts.copy()
|
| 485 |
+
district_exploded["district_location"] = district_posts["district_location"].str.split(", ")
|
| 486 |
+
district_exploded = district_exploded.explode("district_location")
|
| 487 |
+
district_exploded["district_location"] = district_exploded["district_location"].str.strip().str.lower()
|
| 488 |
+
|
| 489 |
+
district_counts = district_exploded.groupby("district_location").size().reset_index(name="count")
|
| 490 |
+
merged_districts = pd.merge(district_counts, districts_df, left_on="district_location", right_on="district_name", how="inner")
|
| 491 |
+
merged_districts["location_type"] = "District"
|
| 492 |
+
merged_districts["location_name"] = merged_districts["district_name"]
|
| 493 |
+
|
| 494 |
+
# Combine both datasets
|
| 495 |
+
all_locations = pd.concat([merged_wards, merged_districts], ignore_index=True)
|
| 496 |
+
|
| 497 |
+
if not all_locations.empty:
|
| 498 |
+
# Determine center of map
|
| 499 |
+
center_lat = all_locations["lat"].mean()
|
| 500 |
+
center_lon = all_locations["lon"].mean()
|
| 501 |
+
|
| 502 |
+
# Create unified map
|
| 503 |
+
m_unified = folium.Map(
|
| 504 |
+
location=[center_lat, center_lon],
|
| 505 |
+
zoom_start=9 if ward_data_available else 7,
|
| 506 |
+
tiles="OpenStreetMap"
|
| 507 |
+
)
|
| 508 |
+
|
| 509 |
+
# Add heatmap layer
|
| 510 |
+
heat_data = [[row["lat"], row["lon"], row["count"]] for _, row in all_locations.iterrows()]
|
| 511 |
+
HeatMap(heat_data, radius=20, blur=15, max_zoom=13, gradient={
|
| 512 |
+
0.0: 'blue', 0.5: 'yellow', 0.75: 'orange', 1.0: 'red'
|
| 513 |
+
}).add_to(m_unified)
|
| 514 |
+
|
| 515 |
+
# Determine hotspot threshold
|
| 516 |
+
threshold = all_locations["count"].quantile(0.70)
|
| 517 |
+
all_locations["is_hotspot"] = all_locations["count"] >= threshold
|
| 518 |
+
|
| 519 |
+
# Add markers for each location
|
| 520 |
+
for _, row in all_locations.iterrows():
|
| 521 |
+
location_name = row["location_name"].title()
|
| 522 |
+
location_type = row["location_type"]
|
| 523 |
+
incident_count = row["count"]
|
| 524 |
+
|
| 525 |
+
# Get location-specific crime data
|
| 526 |
+
if location_type == "Ward":
|
| 527 |
+
loc_data = posts_df[posts_df["ward_location"].str.contains(row["location_name"], case=False, na=False)]
|
| 528 |
+
else:
|
| 529 |
+
loc_data = posts_df[posts_df["district_location"].str.contains(row["location_name"], case=False, na=False)]
|
| 530 |
+
|
| 531 |
+
# Severity breakdown
|
| 532 |
+
severity_breakdown = loc_data["severity"].value_counts().to_dict()
|
| 533 |
+
severity_html = "<br>".join([f" β’ {sev}: {count}" for sev, count in severity_breakdown.items()])
|
| 534 |
+
|
| 535 |
+
# Critical incidents count
|
| 536 |
+
critical_count = severity_breakdown.get("Critical", 0)
|
| 537 |
+
|
| 538 |
+
# Top drugs in this location
|
| 539 |
+
loc_drugs = loc_data["drugs_mentioned"].str.split(", ").explode()
|
| 540 |
+
top_drugs = loc_drugs[loc_drugs != "Unspecified"].value_counts().head(3)
|
| 541 |
+
drugs_html = "<br>".join([f" β’ {drug}: {count}" for drug, count in top_drugs.items()])
|
| 542 |
+
|
| 543 |
+
# Average threat score
|
| 544 |
+
avg_threat = loc_data["threat_score"].mean()
|
| 545 |
+
|
| 546 |
+
# Recent high-threat incidents
|
| 547 |
+
recent = loc_data.nlargest(3, "threat_score")[["title", "severity", "threat_score"]]
|
| 548 |
+
incidents_html = "<br>".join([
|
| 549 |
+
f" β’ <b>[{r['severity']}]</b> {r['title'][:50]}... <i>(Score: {r['threat_score']:.0f})</i>"
|
| 550 |
+
for _, r in recent.iterrows()
|
| 551 |
+
])
|
| 552 |
+
|
| 553 |
+
# Marker color based on severity
|
| 554 |
+
marker_color = 'darkred' if row["is_hotspot"] else ('red' if incident_count >= 5 else ('orange' if incident_count >= 3 else 'blue'))
|
| 555 |
+
|
| 556 |
+
# Icon based on type
|
| 557 |
+
icon_symbol = 'home' if location_type == "Ward" else 'map'
|
| 558 |
+
|
| 559 |
+
# Create detailed popup
|
| 560 |
+
popup_html = f"""
|
| 561 |
+
<div style='width: 350px; font-family: Arial, sans-serif;'>
|
| 562 |
+
<h3 style='color: {marker_color}; margin-bottom: 8px; border-bottom: 2px solid {marker_color}; padding-bottom: 5px;'>
|
| 563 |
+
{location_type}: {location_name}
|
| 564 |
+
</h3>
|
| 565 |
+
<div style='margin: 10px 0;'>
|
| 566 |
+
<b>π Total Incidents:</b> <span style='font-size: 18px; color: {marker_color};'>{incident_count}</span><br>
|
| 567 |
+
<b>π¨ Critical Threats:</b> <span style='font-size: 18px; color: darkred;'>{critical_count}</span><br>
|
| 568 |
+
<b>π Avg Threat Score:</b> <span style='font-size: 16px;'>{avg_threat:.1f}/100</span>
|
| 569 |
+
</div>
|
| 570 |
+
<hr style='border: 1px solid #ddd;'>
|
| 571 |
+
<div style='margin: 10px 0;'>
|
| 572 |
+
<b>β οΈ Severity Breakdown:</b><br>
|
| 573 |
+
{severity_html if severity_html else ' No data'}
|
| 574 |
+
</div>
|
| 575 |
+
<hr style='border: 1px solid #ddd;'>
|
| 576 |
+
<div style='margin: 10px 0;'>
|
| 577 |
+
<b>π Top Substances Detected:</b><br>
|
| 578 |
+
{drugs_html if not top_drugs.empty else ' None identified'}
|
| 579 |
+
</div>
|
| 580 |
+
<hr style='border: 1px solid #ddd;'>
|
| 581 |
+
<div style='margin: 10px 0;'>
|
| 582 |
+
<b>π― Recent High-Threat Incidents:</b><br>
|
| 583 |
+
{incidents_html if not recent.empty else ' None'}
|
| 584 |
+
</div>
|
| 585 |
+
<div style='margin-top: 10px; padding: 5px; background-color: #f0f0f0; border-radius: 5px; text-align: center; font-size: 11px;'>
|
| 586 |
+
<i>Click marker for details β’ Hover for quick info</i>
|
| 587 |
+
</div>
|
| 588 |
+
</div>
|
| 589 |
+
"""
|
| 590 |
+
|
| 591 |
+
# Tooltip (hover text)
|
| 592 |
+
tooltip_text = f"""
|
| 593 |
+
<b>{location_type}: {location_name}</b><br>
|
| 594 |
+
Total Incidents: {incident_count}<br>
|
| 595 |
+
Critical: {critical_count} | Avg Threat: {avg_threat:.1f}
|
| 596 |
+
"""
|
| 597 |
+
|
| 598 |
+
# Add marker
|
| 599 |
+
folium.CircleMarker(
|
| 600 |
+
location=[row["lat"], row["lon"]],
|
| 601 |
+
radius=min(incident_count * 2.5 if location_type == "Ward" else incident_count * 3.5, 25),
|
| 602 |
+
color=marker_color,
|
| 603 |
+
fill=True,
|
| 604 |
+
fill_color=marker_color,
|
| 605 |
+
fill_opacity=0.7,
|
| 606 |
+
weight=2,
|
| 607 |
+
popup=folium.Popup(popup_html, max_width=400),
|
| 608 |
+
tooltip=folium.Tooltip(tooltip_text, sticky=True)
|
| 609 |
+
).add_to(m_unified)
|
| 610 |
+
|
| 611 |
+
# Display map
|
| 612 |
+
st_folium(m_unified, width="100%", height=700)
|
| 613 |
+
|
| 614 |
+
# Hotspot analysis table
|
| 615 |
+
st.subheader("π₯ Top Crime Hotspots")
|
| 616 |
+
|
| 617 |
+
col1 = st.columns(1)
|
| 618 |
+
|
| 619 |
+
with col1[0]:
|
| 620 |
+
st.markdown("**High-Activity Wards**")
|
| 621 |
+
if not merged_wards.empty:
|
| 622 |
+
ward_display = merged_wards.sort_values("count", ascending=False).head(10)
|
| 623 |
+
st.dataframe(
|
| 624 |
+
ward_display[["ward_name", "count"]].rename(columns={
|
| 625 |
+
"ward_name": "Ward Name",
|
| 626 |
+
"count": "Incidents"
|
| 627 |
+
}).reset_index(drop=True),
|
| 628 |
+
use_container_width=True,
|
| 629 |
+
height=300
|
| 630 |
+
)
|
| 631 |
+
else:
|
| 632 |
+
st.info("No ward data available")
|
| 633 |
+
|
| 634 |
+
st.markdown("---")
|
| 635 |
+
|
| 636 |
+
# --- High-Priority Intelligence Reports
|
| 637 |
+
st.subheader("π¨ High-Priority Intelligence Reports")
|
| 638 |
+
|
| 639 |
+
if not posts_df.empty:
|
| 640 |
+
priority_posts = posts_df[
|
| 641 |
+
(posts_df["severity"].isin(['Critical', 'High'])) |
|
| 642 |
+
(posts_df["threat_score"] >= 50)
|
| 643 |
+
].sort_values("threat_score", ascending=False)
|
| 644 |
+
|
| 645 |
+
if not priority_posts.empty:
|
| 646 |
+
priority_posts = priority_posts.drop_duplicates(subset=['id'], keep='first')
|
| 647 |
+
|
| 648 |
+
display_cols = ["datetime", "title", "severity", "threat_score", "drugs_mentioned", "ward_location", "subreddit"]
|
| 649 |
+
available_cols = [col for col in display_cols if col in priority_posts.columns]
|
| 650 |
+
|
| 651 |
+
st.dataframe(
|
| 652 |
+
priority_posts[available_cols].head(50).rename(columns={
|
| 653 |
+
"datetime": "Timestamp",
|
| 654 |
+
"title": "Intelligence Report",
|
| 655 |
+
"severity": "Severity",
|
| 656 |
+
"threat_score": "Threat Score",
|
| 657 |
+
"drugs_mentioned": "Substances",
|
| 658 |
+
"ward_location": "Location",
|
| 659 |
+
"subreddit": "Source"
|
| 660 |
+
}),
|
| 661 |
+
use_container_width=True,
|
| 662 |
+
height=400
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
st.download_button(
|
| 666 |
+
label="π₯ Download Priority Reports (CSV)",
|
| 667 |
+
data=priority_posts[available_cols].to_csv(index=False).encode("utf-8"),
|
| 668 |
+
file_name=f"priority_intelligence_{datetime.now().strftime('%Y%m%d')}.csv",
|
| 669 |
+
mime="text/csv"
|
| 670 |
+
)
|
| 671 |
+
else:
|
| 672 |
+
st.info("No high-priority incidents in selected date range")
|
| 673 |
+
else:
|
| 674 |
+
st.info("No intelligence data available")
|
| 675 |
+
|
| 676 |
+
st.markdown("---")
|
| 677 |
+
|
| 678 |
+
# --- Advanced Analytics Section
|
| 679 |
+
st.subheader("π¬ Advanced Crime Analytics")
|
| 680 |
+
|
| 681 |
+
col1, col2 = st.columns(2)
|
| 682 |
+
|
| 683 |
+
with col1:
|
| 684 |
+
if "hour" in posts_df.columns and "severity" in posts_df.columns:
|
| 685 |
+
st.markdown("**Crime Patterns by Time of Day**")
|
| 686 |
+
time_severity = posts_df.groupby(["hour", "severity"]).size().reset_index(name="count")
|
| 687 |
+
fig_time = px.bar(
|
| 688 |
+
time_severity,
|
| 689 |
+
x="hour",
|
| 690 |
+
y="count",
|
| 691 |
+
color="severity",
|
| 692 |
+
title="Crime Activity by Hour and Severity",
|
| 693 |
+
labels={"hour": "Hour of Day", "count": "Incidents"},
|
| 694 |
+
color_discrete_map={
|
| 695 |
+
'Critical': '#FF0000',
|
| 696 |
+
'High': '#FF6B00',
|
| 697 |
+
'Medium': '#FFD700',
|
| 698 |
+
'Low': '#90EE90'
|
| 699 |
+
}
|
| 700 |
+
)
|
| 701 |
+
st.plotly_chart(fig_time, use_container_width=True)
|
| 702 |
+
|
| 703 |
+
with col2:
|
| 704 |
+
if "sentiment_score" in posts_df.columns and "severity" in posts_df.columns:
|
| 705 |
+
st.markdown("**Sentiment vs Crime Severity**")
|
| 706 |
+
fig_sentiment_severity = px.box(
|
| 707 |
+
posts_df,
|
| 708 |
+
x="severity",
|
| 709 |
+
y="sentiment_score",
|
| 710 |
+
color="severity",
|
| 711 |
+
title="Sentiment Distribution by Crime Severity",
|
| 712 |
+
labels={"sentiment_score": "Sentiment Score", "severity": "Crime Severity"},
|
| 713 |
+
color_discrete_map={
|
| 714 |
+
'Critical': '#FF0000',
|
| 715 |
+
'High': '#FF6B00',
|
| 716 |
+
'Medium': '#FFD700',
|
| 717 |
+
'Low': '#90EE90'
|
| 718 |
+
}
|
| 719 |
+
)
|
| 720 |
+
st.plotly_chart(fig_sentiment_severity, use_container_width=True)
|
| 721 |
+
|
| 722 |
+
st.markdown("---")
|
| 723 |
+
|
| 724 |
+
# --- Network Analysis
|
| 725 |
+
if "subreddit" in posts_df.columns and "drugs_mentioned" in posts_df.columns:
|
| 726 |
+
st.subheader("πΈοΈ Source-Substance Network Analysis")
|
| 727 |
+
|
| 728 |
+
source_drug = posts_df[posts_df["drugs_mentioned"] != "Unspecified"].groupby(
|
| 729 |
+
["subreddit", "drugs_mentioned"]
|
| 730 |
+
).size().reset_index(name="mentions")
|
| 731 |
+
|
| 732 |
+
if not source_drug.empty:
|
| 733 |
+
top_relationships = source_drug.nlargest(15, "mentions")
|
| 734 |
+
|
| 735 |
+
fig_network = px.bar(
|
| 736 |
+
top_relationships,
|
| 737 |
+
x="mentions",
|
| 738 |
+
y="subreddit",
|
| 739 |
+
color="drugs_mentioned",
|
| 740 |
+
orientation='h',
|
| 741 |
+
title="Top Source-Substance Relationships",
|
| 742 |
+
labels={"mentions": "Number of Mentions", "subreddit": "Source Community"},
|
| 743 |
+
height=500
|
| 744 |
+
)
|
| 745 |
+
st.plotly_chart(fig_network, use_container_width=True)
|
| 746 |
+
|
| 747 |
+
st.markdown("---")
|
| 748 |
+
|
| 749 |
+
# --- Emerging Threats Detection
|
| 750 |
+
st.subheader("β‘ Emerging Threats Detection")
|
| 751 |
+
|
| 752 |
+
if "date" in posts_df.columns and "threat_score" in posts_df.columns:
|
| 753 |
+
today = posts_df["date"].max()
|
| 754 |
+
last_week = today - timedelta(days=7)
|
| 755 |
+
prev_week = last_week - timedelta(days=7)
|
| 756 |
+
|
| 757 |
+
recent_threats = posts_df[posts_df["date"] >= last_week]["threat_score"].mean()
|
| 758 |
+
previous_threats = posts_df[(posts_df["date"] >= prev_week) & (posts_df["date"] < last_week)]["threat_score"].mean()
|
| 759 |
+
|
| 760 |
+
threat_change = ((recent_threats - previous_threats) / previous_threats * 100) if previous_threats > 0 else 0
|
| 761 |
+
|
| 762 |
+
col1, col2, col3 = st.columns(3)
|
| 763 |
+
|
| 764 |
+
with col1:
|
| 765 |
+
st.metric(
|
| 766 |
+
"Threat Level Trend",
|
| 767 |
+
f"{recent_threats:.1f}",
|
| 768 |
+
f"{threat_change:+.1f}%",
|
| 769 |
+
delta_color="inverse"
|
| 770 |
+
)
|
| 771 |
+
|
| 772 |
+
with col2:
|
| 773 |
+
recent_locs = set(posts_df[posts_df["date"] >= last_week]["ward_location"].str.split(", ").explode())
|
| 774 |
+
prev_locs = set(posts_df[posts_df["date"] < last_week]["ward_location"].str.split(", ").explode())
|
| 775 |
+
new_locations = len(recent_locs - prev_locs)
|
| 776 |
+
st.metric("New Active Locations", new_locations)
|
| 777 |
+
|
| 778 |
+
with col3:
|
| 779 |
+
daily_avg = posts_df.groupby("date").size().mean()
|
| 780 |
+
recent_avg = posts_df[posts_df["date"] >= last_week].groupby("date").size().mean()
|
| 781 |
+
spike = recent_avg > daily_avg * 1.5
|
| 782 |
+
st.metric("Activity Status", "β οΈ SPIKE" if spike else "β
Normal")
|
| 783 |
+
|
| 784 |
+
st.markdown("---")
|
| 785 |
+
|
| 786 |
+
# --- Intelligence Summary Report
|
| 787 |
+
st.subheader("π Executive Intelligence Summary")
|
| 788 |
+
|
| 789 |
+
summary_col1, summary_col2 = st.columns(2)
|
| 790 |
+
|
| 791 |
+
with summary_col1:
|
| 792 |
+
st.markdown("**Key Findings:**")
|
| 793 |
+
|
| 794 |
+
if not posts_df.empty:
|
| 795 |
+
if "ward_location" in posts_df.columns and "threat_score" in posts_df.columns:
|
| 796 |
+
ward_posts_with_location = posts_df[posts_df["ward_location"] != ""].copy()
|
| 797 |
+
if not ward_posts_with_location.empty:
|
| 798 |
+
ward_exploded_threat = ward_posts_with_location.copy()
|
| 799 |
+
ward_exploded_threat["ward_location"] = ward_posts_with_location["ward_location"].str.split(", ")
|
| 800 |
+
ward_exploded_threat = ward_exploded_threat.explode("ward_location").reset_index(drop=True)
|
| 801 |
+
|
| 802 |
+
ward_threat = ward_exploded_threat.groupby("ward_location")["threat_score"].mean().sort_values(ascending=False)
|
| 803 |
+
|
| 804 |
+
if not ward_threat.empty:
|
| 805 |
+
st.markdown(f"π― **Highest Threat Zone:** {ward_threat.index[0].title()} (Score: {ward_threat.iloc[0]:.1f})")
|
| 806 |
+
|
| 807 |
+
if "drugs_mentioned" in posts_df.columns:
|
| 808 |
+
top_drug = posts_df["drugs_mentioned"].str.split(", ").explode().value_counts()
|
| 809 |
+
if len(top_drug) > 0 and top_drug.index[0] != "Unspecified":
|
| 810 |
+
st.markdown(f"π **Primary Substance:** {top_drug.index[0]} ({top_drug.iloc[0]} mentions)")
|
| 811 |
+
|
| 812 |
+
if "hour" in posts_df.columns:
|
| 813 |
+
peak_hour = posts_df["hour"].mode()[0]
|
| 814 |
+
st.markdown(f"π **Peak Activity Time:** {peak_hour}:00 - {peak_hour+1}:00")
|
| 815 |
+
|
| 816 |
+
if "subreddit" in posts_df.columns:
|
| 817 |
+
top_source = posts_df["subreddit"].value_counts().index[0]
|
| 818 |
+
st.markdown(f"π± **Primary Intelligence Source:** r/{top_source}")
|
| 819 |
+
|
| 820 |
+
with summary_col2:
|
| 821 |
+
st.markdown("**Risk Assessment:**")
|
| 822 |
+
|
| 823 |
+
if not posts_df.empty and "severity" in posts_df.columns:
|
| 824 |
+
critical_pct = (len(posts_df[posts_df["severity"] == "Critical"]) / len(posts_df) * 100)
|
| 825 |
+
|
| 826 |
+
if critical_pct > 30:
|
| 827 |
+
risk_level = "π΄ CRITICAL"
|
| 828 |
+
risk_desc = "Immediate action required"
|
| 829 |
+
elif critical_pct > 15:
|
| 830 |
+
risk_level = "π HIGH"
|
| 831 |
+
risk_desc = "Enhanced monitoring recommended"
|
| 832 |
+
elif critical_pct > 5:
|
| 833 |
+
risk_level = "π‘ MODERATE"
|
| 834 |
+
risk_desc = "Standard surveillance protocols"
|
| 835 |
+
else:
|
| 836 |
+
risk_level = "π’ LOW"
|
| 837 |
+
risk_desc = "Routine monitoring sufficient"
|
| 838 |
+
|
| 839 |
+
st.markdown(f"**Overall Risk Level:** {risk_level}")
|
| 840 |
+
st.markdown(f"*{risk_desc}*")
|
| 841 |
+
st.markdown(f"- Critical incidents: {critical_pct:.1f}%")
|
| 842 |
+
st.markdown(f"- Total monitored incidents: {len(posts_df)}")
|
| 843 |
+
st.markdown(f"- Date range: {posts_df['date'].min()} to {posts_df['date'].max()}")
|
| 844 |
+
|
| 845 |
+
st.markdown("---")
|
| 846 |
+
|
| 847 |
+
# --- Export Options
|
| 848 |
+
st.subheader("π€ Export Intelligence Reports")
|
| 849 |
+
|
| 850 |
+
export_col1, export_col2, export_col3 = st.columns(3)
|
| 851 |
+
|
| 852 |
+
with export_col1:
|
| 853 |
+
if not posts_df.empty:
|
| 854 |
+
full_export = posts_df.to_csv(index=False).encode("utf-8")
|
| 855 |
+
st.download_button(
|
| 856 |
+
label="π Full Dataset",
|
| 857 |
+
data=full_export,
|
| 858 |
+
file_name=f"intelligence_full_{datetime.now().strftime('%Y%m%d')}.csv",
|
| 859 |
+
mime="text/csv"
|
| 860 |
+
)
|
| 861 |
+
|
| 862 |
+
with export_col2:
|
| 863 |
+
if "severity" in posts_df.columns:
|
| 864 |
+
critical_data = posts_df[posts_df["severity"] == "Critical"]
|
| 865 |
+
if not critical_data.empty:
|
| 866 |
+
critical_export = critical_data.to_csv(index=False).encode("utf-8")
|
| 867 |
+
st.download_button(
|
| 868 |
+
label="π¨ Critical Incidents",
|
| 869 |
+
data=critical_export,
|
| 870 |
+
file_name=f"critical_incidents_{datetime.now().strftime('%Y%m%d')}.csv",
|
| 871 |
+
mime="text/csv"
|
| 872 |
+
)
|
| 873 |
+
|
| 874 |
+
with export_col3:
|
| 875 |
+
if 'merged_wards' in locals() and not merged_wards.empty:
|
| 876 |
+
location_export = merged_wards.to_csv(index=False).encode("utf-8")
|
| 877 |
+
st.download_button(
|
| 878 |
+
label="πΊοΈ Location Analysis",
|
| 879 |
+
data=location_export,
|
| 880 |
+
file_name=f"location_analysis_{datetime.now().strftime('%Y%m%d')}.csv",
|
| 881 |
+
mime="text/csv"
|
| 882 |
+
)
|
| 883 |
+
|
| 884 |
+
st.markdown("---")
|
| 885 |
+
|
| 886 |
+
# --- System Status Footer
|
| 887 |
+
st.markdown("**π Intelligence System Status:**")
|
| 888 |
+
status_cols = st.columns(4)
|
| 889 |
+
with status_cols[0]:
|
| 890 |
+
st.write("π Posts:", "β
Online" if data_status["posts"] else "β Offline")
|
| 891 |
+
with status_cols[1]:
|
| 892 |
+
st.write("π¬ Comments:", "β
Online" if data_status["comments"] else "β Offline")
|
| 893 |
+
with status_cols[2]:
|
| 894 |
+
st.write("ποΈ Wards:", "β
Online" if data_status["wards"] else "β Offline")
|
| 895 |
+
with status_cols[3]:
|
| 896 |
+
st.write("π Districts:", "β
Online" if data_status["districts"] else "β Offline")
|
| 897 |
+
|
| 898 |
+
try:
|
| 899 |
+
file_mod_time = datetime.fromtimestamp(os.path.getmtime(POSTS_FILE))
|
| 900 |
+
st.markdown(f"*Intelligence data last updated: {file_mod_time.strftime('%Y-%m-%d %H:%M:%S')}*")
|
| 901 |
+
except:
|
| 902 |
+
pass
|
| 903 |
|
| 904 |
+
st.markdown("---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|