Spaces:

Akilashamnaka12
/

NewsAI_web_app

Sleeping

App Files Files Community

NewsAI_web_app / src /streamlit_app.py

Akilashamnaka12

Update src/streamlit_app.py

2116747 verified 6 days ago

raw

history blame contribute delete

30.5 kB

	import os
	import re
	import nltk
	import pandas as pd
	import streamlit as st
	import matplotlib.pyplot as plt
	from collections import Counter
	from wordcloud import WordCloud
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize

	# ─── HF token (set as a Secret in Space settings for private/gated models) ────
	HF_TOKEN = os.environ.get("HF_TOKEN", None)

	# ─── Page Config ──────────────────────────────────────────────────────────────
	st.set_page_config(
	page_title="NewsLens · Sri Lanka",
	page_icon="🔎",
	layout="wide",
	initial_sidebar_state="collapsed",
	)

	# ─── NLTK – write to /tmp so HF Spaces (read-only FS) can cache data ──────────
	NLTK_DATA_DIR = "/tmp/nltk_data"
	os.makedirs(NLTK_DATA_DIR, exist_ok=True)
	if NLTK_DATA_DIR not in nltk.data.path:
	nltk.data.path.insert(0, NLTK_DATA_DIR)

	@st.cache_resource
	def download_nltk():
	for pkg in ["stopwords", "punkt", "punkt_tab"]:
	try:
	nltk.download(pkg, download_dir=NLTK_DATA_DIR, quiet=True)
	except Exception:
	pass

	download_nltk()

	# ─── CSS ──────────────────────────────────────────────────────────────────────
	st.markdown("""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Sans:ital,wght@0,300;0,400;0,500;1,300&display=swap');

	, ::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

	html, body, [data-testid="stAppViewContainer"] {
	background: #07090f !important;
	color: #e8eaf0 !important;
	font-family: 'DM Sans', sans-serif !important;
	}
	[data-testid="stAppViewContainer"] { padding: 0 !important; }
	[data-testid="stHeader"] { background: transparent !important; }
	section.main > div { padding-top: 0 !important; }
	.block-container { padding: 0 2rem 4rem 2rem !important; max-width: 1280px !important; }

	/* Hero */
	.hero {
	background: linear-gradient(135deg, #0b1120 0%, #0d1f3c 55%, #062a3a 100%);
	border-bottom: 1px solid #1a2a44;
	padding: 3.5rem 3rem 2.8rem;
	position: relative; overflow: hidden;
	}
	.hero::before {
	content:''; position:absolute; inset:0;
	background: radial-gradient(ellipse 70% 60% at 80% 30%, rgba(0,200,180,.09) 0%, transparent 70%);
	pointer-events: none;
	}
	.hero-eyebrow { font-size:.75rem; font-weight:500; letter-spacing:.18em; color:#00c8b4; text-transform:uppercase; margin-bottom:.9rem; }
	.hero-title { font-family:'Syne',sans-serif; font-size:clamp(2.2rem,5vw,3.6rem); font-weight:800; line-height:1.08; color:#fff; margin-bottom:1rem; }
	.hero-title span { color:#00c8b4; }
	.hero-sub { font-size:1.05rem; font-weight:300; line-height:1.65; color:#94a3b8; max-width:560px; }

	/* Tabs */
	[data-testid="stTabs"] > div:first-child { background:#0b111f; border-bottom:1px solid #1a2a44; padding:0 2rem; gap:0 !important; }
	[data-testid="stTabs"] button { font-family:'Syne',sans-serif !important; font-size:.88rem !important; font-weight:600 !important; color:#64748b !important; padding:1rem 1.5rem !important; border-radius:0 !important; border-bottom:2px solid transparent !important; transition:color .2s,border-color .2s !important; }
	[data-testid="stTabs"] button:hover { color:#cbd5e1 !important; }
	[data-testid="stTabs"] button[aria-selected="true"] { color:#00c8b4 !important; border-bottom-color:#00c8b4 !important; background:transparent !important; }

	/* Cards */
	.card { background:#0f172a; border:1px solid #1e2d45; border-radius:14px; padding:1.8rem 1.8rem 1.6rem; margin-bottom:1.4rem; transition:border-color .2s,box-shadow .2s; }
	.card:hover { border-color:#00c8b4; box-shadow:0 0 28px rgba(0,200,180,.08); }
	.card-title { font-family:'Syne',sans-serif; font-size:1rem; font-weight:700; color:#e2e8f0; margin-bottom:.35rem; }
	.card-sub { font-size:.82rem; color:#64748b; font-weight:300; margin-bottom:1.1rem; }

	/* Labels / chips / badges */
	.section-label { font-family:'Syne',sans-serif; font-size:.72rem; font-weight:700; letter-spacing:.14em; text-transform:uppercase; color:#00c8b4; margin-bottom:.6rem; }
	.stat-row { display:flex; gap:1rem; flex-wrap:wrap; margin:1rem 0; }
	.stat-chip { background:#1e2d45; border-radius:8px; padding:.55rem 1.1rem; font-family:'Syne',sans-serif; font-size:.85rem; font-weight:600; color:#e2e8f0; }
	.stat-chip span { color:#00c8b4; font-size:1.15rem; display:block; }
	.badge { display:inline-block; padding:.25rem .7rem; border-radius:999px; font-size:.72rem; font-weight:600; letter-spacing:.05em; text-transform:uppercase; }
	.badge-teal { background:rgba(0,200,180,.15); color:#00c8b4; border:1px solid rgba(0,200,180,.3); }
	.badge-blue { background:rgba(59,130,246,.15); color:#60a5fa; border:1px solid rgba(59,130,246,.3); }
	.badge-amber { background:rgba(245,158,11,.12); color:#fbbf24; border:1px solid rgba(245,158,11,.3); }
	.badge-rose { background:rgba(244,63,94,.12); color:#fb7185; border:1px solid rgba(244,63,94,.3); }
	.badge-violet { background:rgba(139,92,246,.12); color:#a78bfa; border:1px solid rgba(139,92,246,.3); }

	/* Answer box */
	.answer-box { background:linear-gradient(135deg,#0b2034,#091c2e); border:1px solid #00c8b4; border-radius:12px; padding:1.4rem 1.6rem; margin-top:1.2rem; }
	.answer-label { font-family:'Syne',sans-serif; font-size:.68rem; font-weight:700; letter-spacing:.14em; text-transform:uppercase; color:#00c8b4; margin-bottom:.5rem; }
	.answer-text { font-size:1.05rem; color:#e2e8f0; line-height:1.7; }
	.score-bar-wrap { margin-top:.8rem; }
	.score-bar-label { font-size:.75rem; color:#64748b; margin-bottom:.25rem; }
	.score-bar-outer { background:#1e2d45; border-radius:999px; height:6px; }
	.score-bar-inner { background:linear-gradient(90deg,#00c8b4,#0ea5e9); border-radius:999px; height:6px; }

	/* Inputs */
	[data-testid="stFileUploader"] { background:#0f172a !important; border:1.5px dashed #1e3a5f !important; border-radius:12px !important; padding:1.5rem !important; }
	[data-testid="stFileUploader"]:hover { border-color:#00c8b4 !important; }
	textarea { background:#0f172a !important; border:1px solid #1e2d45 !important; border-radius:10px !important; color:#e2e8f0 !important; font-family:'DM Sans',sans-serif !important; font-size:.95rem !important; }
	textarea:focus { border-color:#00c8b4 !important; box-shadow:0 0 0 2px rgba(0,200,180,.18) !important; }

	/* Buttons */
	.stButton > button { background:linear-gradient(135deg,#00c8b4,#0ea5e9) !important; color:#07090f !important; border:none !important; border-radius:8px !important; font-family:'Syne',sans-serif !important; font-weight:700 !important; font-size:.88rem !important; letter-spacing:.04em !important; padding:.6rem 1.6rem !important; cursor:pointer !important; transition:opacity .2s,box-shadow .2s !important; }
	.stButton > button:hover { opacity:.88 !important; box-shadow:0 4px 20px rgba(0,200,180,.35) !important; }
	[data-testid="stDownloadButton"] button { background:transparent !important; border:1.5px solid #00c8b4 !important; color:#00c8b4 !important; font-family:'Syne',sans-serif !important; font-weight:700 !important; font-size:.85rem !important; border-radius:8px !important; padding:.55rem 1.4rem !important; transition:background .2s !important; }
	[data-testid="stDownloadButton"] button:hover { background:rgba(0,200,180,.12) !important; }

	/* Misc */
	hr { border-color:#1e2d45 !important; margin:1.8rem 0 !important; }
	[data-testid="stSelectbox"] > div > div { background:#0f172a !important; border-color:#1e2d45 !important; color:#e2e8f0 !important; border-radius:8px !important; }
	::-webkit-scrollbar { width:6px; }
	::-webkit-scrollbar-track { background:#0b111f; }
	::-webkit-scrollbar-thumb { background:#1e2d45; border-radius:3px; }
	::-webkit-scrollbar-thumb:hover { background:#00c8b4; }
	[data-testid="stTabsContent"] { padding:2rem 0 !important; }
	</style>
	""", unsafe_allow_html=True)

	# ─── Constants ────────────────────────────────────────────────────────────────
	CATEGORIES = ["Business", "Opinion", "Political_gossip", "Sports", "World_news"]

	CAT_BADGE = {
	"Business": "badge-teal", "Opinion": "badge-blue",
	"Political_gossip": "badge-amber", "Sports": "badge-rose", "World_news": "badge-violet",
	}
	CAT_COLOR = {
	"Business": "#00c8b4", "Opinion": "#60a5fa",
	"Political_gossip": "#fbbf24", "Sports": "#fb7185", "World_news": "#a78bfa",
	}

	# Map whatever the model returns → one of the 5 assignment class names
	LABEL_MAP = {
	"business": "Business", "opinion": "Opinion",
	"political_gossip": "Political_gossip", "political gossip": "Political_gossip",
	"sports": "Sports", "world_news": "World_news", "world news": "World_news", "world": "World_news",
	"label_0": "Business", "label_1": "Opinion",
	"label_2": "Political_gossip", "label_3": "Sports", "label_4": "World_news",
	"business and finance": "Business", "opinions and editorials": "Opinion",
	"politics": "Political_gossip",
	}

	def normalise_label(raw: str) -> str:
	if raw in CATEGORIES:
	return raw
	return LABEL_MAP.get(raw.strip().lower(), raw)

	# ─── Text preprocessor ────────────────────────────────────────────────────────
	def preprocess_text(text: str) -> str:
	if not isinstance(text, str):
	return ""
	text = text.lower()
	text = re.sub(r"http\S+\|www\.\S+", " ", text)
	text = re.sub(r"[^a-z\s]", " ", text)
	text = re.sub(r"\s+", " ", text).strip()
	try:
	sw = set(stopwords.words("english"))
	tokens = word_tokenize(text)
	text = " ".join(t for t in tokens if t not in sw and len(t) > 2)
	except Exception:
	pass
	return text

	# ─── Model loaders ────────────────────────────────────────────────────────────
	@st.cache_resource(show_spinner=False)
	def load_classifier():
	"""
	Replace MODEL_ID with your fine-tuned model pushed to HF Hub in Task 4.
	e.g. "Akilashamnaka12/news_classifier_model"
	If your Space or model is private, add HF_TOKEN as a Secret in Space settings.
	"""
	MODEL_ID = "Akilashamnaka12/news_classifier_model" # ← swap after Task 4

	try:
	from transformers import pipeline as hf_pipeline
	kwargs = {"task": "text-classification", "model": MODEL_ID,
	"truncation": True, "max_length": 512}
	if HF_TOKEN:
	kwargs["token"] = HF_TOKEN
	return hf_pipeline(**kwargs), None
	except Exception as e:
	return None, str(e)


	@st.cache_resource(show_spinner=False)
	@st.cache_resource(show_spinner=False)
	def load_qa():
	QA_MODEL = "deepset/roberta-base-squad2"
	try:
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	import torch
	tok = AutoTokenizer.from_pretrained(QA_MODEL)
	model = AutoModelForQuestionAnswering.from_pretrained(QA_MODEL)
	return (tok, model), None
	except Exception as e:
	return None, str(e)

	# ══════════════════════════════════════════════════════════════════════════════
	# HERO
	# ══════════════════════════════════════════════════════════════════════════════
	st.markdown("""
	<div class="hero">
	<div class="hero-eyebrow"><h5>🔎  Text Analytics · DA3111 - Group 6</h5></div>
	<div class="hero-title">News Lens</div>
	<div class="hero-sub">
	Classify News articles, interrogate content with Q&A,
	and surface editorial insights — all in one unified workspace.
	</div>
	</div>
	""", unsafe_allow_html=True)

	tab1, tab2, tab3 = st.tabs([
	" 📂 Text Classification ",
	" 💬 Q & A Pipeline ",
	" 📊 Insights ",
	])

	# ══════════════════════════════════════════════════════════════════════════════
	# TAB 1 – TEXT CLASSIFICATION
	# ══════════════════════════════════════════════════════════════════════════════
	with tab1:
	left, right = st.columns([1.1, 1], gap="large")

	with left:
	st.markdown('<div class="section-label">Upload</div>', unsafe_allow_html=True)
	st.markdown("""
	<div class="card">
	<div class="card-title">Upload your CSV file</div>
	<div class="card-sub"><code style="background: #00c8b4; color:#000000">Must contain a content column with news excerpts.</code></div>
	""", unsafe_allow_html=True)
	uploaded = st.file_uploader("", type=["csv"], label_visibility="collapsed")
	st.markdown("</div>", unsafe_allow_html=True)

	if uploaded:
	try:
	uploaded.seek(0) # reset buffer – important on HF Spaces
	df_raw = pd.read_csv(uploaded)
	except Exception as e:
	st.error(f"Could not parse CSV: {e}")
	st.stop()

	if "content" not in df_raw.columns:
	st.error("❌ The uploaded file must have a `content` column.")
	else:
	st.markdown(f"""
	<div class="stat-row">
	<div class="stat-chip"><span>{len(df_raw)}</span>Records</div>
	<div class="stat-chip"><span>{df_raw.shape[1]}</span>Columns</div>
	</div>""", unsafe_allow_html=True)

	st.markdown('<div class="section-label" style="margin-top:1rem">Preview</div>',
	unsafe_allow_html=True)
	st.dataframe(df_raw.head(5), use_container_width=True, hide_index=True)

	run_btn = st.button("⚡ Run Classification", use_container_width=True)

	if run_btn:
	with st.spinner("Loading classifier… (first run ~30 s on HF Spaces)"):
	clf, err = load_classifier()
	if err:
	st.error(f"Model load error: {err}")
	else:
	df_out = df_raw.copy()
	pred_labels = []
	prog = st.progress(0, text="Classifying…")
	texts = df_out["content"].fillna("").tolist()

	for i, txt in enumerate(texts):
	clean = preprocess_text(txt) or txt[:512]
	try:
	raw = clf(clean[:512])[0]["label"]
	label = normalise_label(raw)
	except Exception:
	label = "Unknown"
	pred_labels.append(label)
	prog.progress((i + 1) / len(texts),
	text=f"Classifying {i+1}/{len(texts)}…")

	prog.empty()
	df_out["class"] = pred_labels
	st.session_state["df_classified"] = df_out
	st.session_state["classification_done"] = True
	st.rerun()

	with right:
	st.markdown('<div class="section-label">Results</div>', unsafe_allow_html=True)

	if st.session_state.get("classification_done"):
	df_out = st.session_state["df_classified"]
	counts = df_out["class"].value_counts()

	chip_html = '<div class="stat-row">'
	for cat, cnt in counts.items():
	badge = CAT_BADGE.get(cat, "badge-teal")
	chip_html += (f'<div class="stat-chip"><span>{cnt}</span>'
	f'<span class="badge {badge}">{cat.replace("_"," ")}</span></div>')
	chip_html += "</div>"
	st.markdown(chip_html, unsafe_allow_html=True)

	cols = [c for c in ["content", "class"] if c in df_out.columns]
	st.markdown('<div class="card" style="margin-top:.8rem">', unsafe_allow_html=True)
	st.markdown('<div class="card-title">Classified Records</div>', unsafe_allow_html=True)
	st.dataframe(df_out[cols].head(20), use_container_width=True, hide_index=True,
	column_config={"content": st.column_config.TextColumn("Content", width="large")})
	st.markdown("</div>", unsafe_allow_html=True)

	st.download_button(
	"⬇ Download output.csv",
	data=df_out.to_csv(index=False).encode("utf-8"),
	file_name="output.csv", mime="text/csv",
	use_container_width=True,
	)
	else:
	st.markdown("""
	<div class="card" style="text-align:center;padding:3.5rem 2rem;">
	<div style="font-size:3rem;margin-bottom:1rem">📂</div>
	<div style="font-family:'Syne',sans-serif;font-size:1rem;font-weight:700;color:#334155;">
	Upload a CSV to see results</div>
	<div style="font-size:.82rem;color:#475569;margin-top:.4rem;">
	Predictions appear here after classification runs.</div>
	</div>""", unsafe_allow_html=True)

	# ══════════════════════════════════════════════════════════════════════════════
	# TAB 2 – Q&A PIPELINE
	# ══════════════════════════════════════════════════════════════════════════════
	with tab2:
	l2, r2 = st.columns([1, 1], gap="large")

	with l2:
	st.markdown('<div class="section-label">Context</div>', unsafe_allow_html=True)
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.markdown('<div class="card-title">Paste a news excerpt</div>', unsafe_allow_html=True)
	st.markdown('<div class="card-sub">The Q&A model will read this as its context.</div>',
	unsafe_allow_html=True)

	default_ctx = ""
	if st.session_state.get("classification_done"):
	df_c = st.session_state["df_classified"]
	if len(df_c):
	default_ctx = str(df_c["content"].iloc[0])

	context_text = st.text_area("", value=default_ctx, height=260,
	placeholder="Paste any news article content here…",
	label_visibility="collapsed", key="qa_context")
	st.markdown("</div>", unsafe_allow_html=True)

	with r2:
	st.markdown('<div class="section-label">Question</div>', unsafe_allow_html=True)
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.markdown('<div class="card-title">Ask anything about the article</div>', unsafe_allow_html=True)
	st.markdown('<div class="card-sub">The model extracts an answer from the context on the left.</div>',
	unsafe_allow_html=True)

	question_text = st.text_area("", height=120,
	placeholder="e.g. Who is mentioned in this article?",
	label_visibility="collapsed", key="qa_question")
	ask_btn = st.button("🔍 Get Answer", use_container_width=True)
	st.markdown("</div>", unsafe_allow_html=True)

	if ask_btn:
	if not context_text.strip():
	st.warning("Please paste a news excerpt in the Context panel on the left.")
	elif not question_text.strip():
	st.warning("Please type a question.")
	else:
	with st.spinner("Loading Q&A model (first run ~30 s)"):
	qa, err = load_qa()
	if err:
	st.error(f"Q&A model failed to load: {err}")
	else:
	with st.spinner("Finding the answer..."):
	try:
	import torch
	tok, model = qa
	q = question_text.strip()
	ctx = context_text.strip()[:3000]

	inputs = tok(q, ctx, return_tensors="pt",
	truncation=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)

	start = outputs.start_logits.argmax()
	end = outputs.end_logits.argmax() + 1
	answer = tok.convert_tokens_to_string(
	tok.convert_ids_to_tokens(
	inputs["input_ids"][0][start:end]
	)
	)
	# Add this line to clean RoBERTa special characters
	answer = answer.replace("Ġ", " ").strip()



	start_prob = outputs.start_logits.softmax(dim=-1).max().item()
	end_prob = outputs.end_logits.softmax(dim=-1).max().item()
	score_pct = int(((start_prob + end_prob) / 2) * 100)

	st.markdown(f"""
	<div class="answer-box">
	<div class="answer-label">Answer</div>
	<div class="answer-text">{answer}</div>
	<div class="score-bar-wrap">
	<div class="score-bar-label">Confidence : {score_pct}%</div>
	<div class="score-bar-outer">
	<div class="score-bar-inner" style="width:{score_pct}%"></div>
	</div>
	</div>
	</div>""", unsafe_allow_html=True)
	except Exception as e:
	st.error(f"Inference error: {e}")

	if st.session_state.get("classification_done"):
	st.markdown("---")
	st.markdown('<div class="section-label">Suggested Questions</div>', unsafe_allow_html=True)
	c1, c2, c3, c4 = st.columns(4)
	for col, q in zip([c1, c2, c3, c4],
	["Who is this article about?", "What event is described?",
	"Where did this take place?", "What was the outcome?"]):
	col.markdown(f"""
	<div class="card" style="padding:1rem 1.2rem;text-align:center;">
	<div style="font-size:.85rem;color:#94a3b8;">{q}</div>
	</div>""", unsafe_allow_html=True)

	# ══════════════════════════════════════════════════════════════════════════════
	# TAB 3 – INSIGHTS
	# ══════════════════════════════════════════════════════════════════════════════
	with tab3:
	if not st.session_state.get("classification_done"):
	st.markdown("""
	<div class="card" style="text-align:center;padding:4rem 2rem;">
	<div style="font-size:3.5rem;margin-bottom:1rem">📊</div>
	<div style="font-family:'Syne',sans-serif;font-size:1.1rem;font-weight:700;color:#334155;">
	Insights unlock after classification</div>
	<div style="font-size:.88rem;color:#475569;margin-top:.5rem;">
	Go to <strong style="color:#00c8b4">Text Classification</strong>,
	upload a CSV, and run the model first.</div>
	</div>""", unsafe_allow_html=True)
	else:
	df_ins = st.session_state["df_classified"]
	counts = df_ins["class"].value_counts()
	total = len(df_ins)

	# KPI row
	kpi_cols = st.columns(5)
	for col, cat in zip(kpi_cols, CATEGORIES):
	cnt = int(counts.get(cat, 0))
	pct = round(cnt / total * 100, 1) if total else 0
	badge = CAT_BADGE.get(cat, "badge-teal")
	col.markdown(f"""
	<div class="card" style="text-align:center;padding:1.4rem 1rem;">
	<div class="badge {badge}" style="margin-bottom:.7rem">{cat.replace('_',' ')}</div>
	<div style="font-family:'Syne',sans-serif;font-size:1.9rem;font-weight:800;color:#e2e8f0">{cnt}</div>
	<div style="font-size:.78rem;color:#64748b;margin-top:.2rem">{pct}% of total</div>
	</div>""", unsafe_allow_html=True)

	st.markdown("---")
	ch1, ch2 = st.columns(2, gap="large")

	with ch1:
	st.markdown('<div class="section-label">Category Distribution</div>', unsafe_allow_html=True)
	fig, ax = plt.subplots(figsize=(5, 4.2), facecolor="#0f172a")
	labels = [c.replace("_", " ") for c in counts.index]
	colors = [CAT_COLOR.get(c, "#00c8b4") for c in counts.index]
	wedges, _, autotexts = ax.pie(
	counts.values, labels=None, autopct="%1.1f%%", colors=colors,
	startangle=120, wedgeprops=dict(width=0.55, edgecolor="#07090f", linewidth=2),
	pctdistance=0.78)
	for at in autotexts:
	at.set_color("#e2e8f0"); at.set_fontsize(8.5); at.set_fontweight("bold")
	ax.legend(wedges, labels, loc="lower center", bbox_to_anchor=(0.5, -0.12),
	ncol=3, frameon=False, labelcolor="#94a3b8", fontsize=8)
	ax.set_facecolor("#0f172a"); fig.patch.set_facecolor("#0f172a")
	st.pyplot(fig, use_container_width=True); plt.close(fig)

	with ch2:
	st.markdown('<div class="section-label">Article Counts by Category</div>', unsafe_allow_html=True)
	fig2, ax2 = plt.subplots(figsize=(5, 4.2), facecolor="#0f172a")
	bars = ax2.barh([l.replace("_", " ") for l in counts.index], counts.values,
	color=[CAT_COLOR.get(c, "#00c8b4") for c in counts.index],
	height=0.55, edgecolor="none")
	ax2.set_facecolor("#0f172a")
	for sp in ["top", "right"]: ax2.spines[sp].set_visible(False)
	for sp in ["left", "bottom"]: ax2.spines[sp].set_color("#1e2d45")
	ax2.tick_params(colors="#64748b", labelsize=8.5)
	for bar in bars:
	ax2.text(bar.get_width() + 0.4, bar.get_y() + bar.get_height() / 2,
	str(int(bar.get_width())), va="center", ha="left",
	color="#e2e8f0", fontsize=8.5, fontweight="bold")
	fig2.patch.set_facecolor("#0f172a")
	st.pyplot(fig2, use_container_width=True); plt.close(fig2)

	st.markdown("---")
	st.markdown('<div class="section-label">Word Cloud by Category</div>', unsafe_allow_html=True)
	selected_cat = st.selectbox("", options=CATEGORIES,
	format_func=lambda c: c.replace("_", " "),
	label_visibility="collapsed")

	cat_texts = df_ins[df_ins["class"] == selected_cat]["content"].fillna("").tolist()
	combined = " ".join(preprocess_text(t) for t in cat_texts[:200])

	if combined.strip():
	wc = WordCloud(width=900, height=340, background_color="#0f172a",
	colormap="cool", max_words=120, collocations=False).generate(combined)
	fig3, ax3 = plt.subplots(figsize=(9, 3.5), facecolor="#0f172a")
	ax3.imshow(wc, interpolation="bilinear"); ax3.axis("off")
	fig3.patch.set_facecolor("#0f172a")
	st.pyplot(fig3, use_container_width=True); plt.close(fig3)
	else:
	st.info(f"No content found for: {selected_cat.replace('_',' ')}")

	st.markdown("---")
	st.markdown(f'<div class="section-label">Top Unigrams · {selected_cat.replace("_"," ")}</div>',
	unsafe_allow_html=True)
	top_words = Counter(combined.split()).most_common(15)
	if top_words:
	words, freqs = zip(*top_words)
	fig4, ax4 = plt.subplots(figsize=(9, 3), facecolor="#0f172a")
	ax4.bar(words, freqs, color=CAT_COLOR.get(selected_cat, "#00c8b4"), edgecolor="none", width=0.6)
	ax4.set_facecolor("#0f172a")
	for sp in ["top", "right"]: ax4.spines[sp].set_visible(False)
	for sp in ["left", "bottom"]: ax4.spines[sp].set_color("#1e2d45")
	ax4.tick_params(axis="x", colors="#64748b", labelsize=8, rotation=30)
	ax4.tick_params(axis="y", colors="#64748b", labelsize=8)
	fig4.patch.set_facecolor("#0f172a")
	st.pyplot(fig4, use_container_width=True); plt.close(fig4)

	# ─── Footer ───────────────────────────────────────────────────────────────────
	st.markdown("""
	<div style="text-align:center;padding:2.5rem 0 1rem;color:#2a3a55;
	font-size:.78rem;border-top:1px solid #1a2a44;margin-top:3rem;">
	Built for <strong style="color:#00c8b4">IN23-S5-DA3111 · Text Analytics Group Project</strong>
	·  Powered by Hugging Face & Streamlit
	</div>
	""", unsafe_allow_html=True)