import streamlit as st
import pandas as pd
import numpy as np
import string, time, re, random
from collections import Counter
# ─────────────────────────────────────────────────────────────────────────────
# PAGE CONFIG
# ─────────────────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="NewsLens AI — Daily Mirror Intelligence",
page_icon="◉",
layout="wide",
initial_sidebar_state="collapsed",
)
# ─────────────────────────────────────────────────────────────────────────────
# NLTK
# ─────────────────────────────────────────────────────────────────────────────
import nltk
@st.cache_resource(show_spinner=False)
def _nltk():
for p in ["punkt","punkt_tab","stopwords","wordnet"]:
nltk.download(p, quiet=True)
_nltk()
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# ─────────────────────────────────────────────────────────────────────────────
# MODELS
# ─────────────────────────────────────────────────────────────────────────────
@st.cache_resource(show_spinner=False)
def load_clf():
from transformers import pipeline
return pipeline("text-classification",
model="Akilashamnaka12/news_classifier_model",
truncation=True, max_length=512)
@st.cache_resource(show_spinner=False)
def load_qa():
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
n = "deepset/roberta-base-squad2"
return pipeline("question-answering",
model=AutoModelForQuestionAnswering.from_pretrained(n),
tokenizer=AutoTokenizer.from_pretrained(n))
# ─────────────────────────────────────────────────────────────────────────────
# CONSTANTS
# ─────────────────────────────────────────────────────────────────────────────
LABEL_MAP = {"LABEL_0":"Business","LABEL_1":"Opinion",
"LABEL_2":"Political_gossip","LABEL_3":"Sports","LABEL_4":"World_news"}
CATS = {
"Business": {"icon":"💼","color":"#0071e3","bg":"#f0f7ff","desc":"Finance & Economy"},
"Opinion": {"icon":"💬","color":"#34c759","bg":"#f0fdf4","desc":"Views & Editorials"},
"Political_gossip": {"icon":"🏛️", "color":"#ff3b30","bg":"#fff1f2","desc":"Politics & Governance"},
"Sports": {"icon":"⚽","color":"#ff9f0a","bg":"#fff7ed","desc":"Matches & Athletics"},
"World_news": {"icon":"🌍","color":"#5e5ce6","bg":"#f5f3ff","desc":"International Affairs"},
}
_sw = set(stopwords.words("english"))
_lem = WordNetLemmatizer()
def preprocess(t):
if not isinstance(t,str) or not t.strip(): return ""
t = t.lower().translate(str.maketrans("","",string.punctuation))
tokens = [_lem.lemmatize(w) for w in word_tokenize(t)
if w not in _sw and w.isalpha()]
return " ".join(tokens)
def resolve(r): return LABEL_MAP.get(r, r)
def word_cloud_html(text, n=65):
words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
stops = {"this","that","with","have","will","from","they","been","were",
"their","there","also","which","when","into","than","then","about",
"more","over","some","such","just","very","even","only","most","said"}
freq = Counter(w for w in words if w not in stops)
top = freq.most_common(n)
if not top: return "
Not enough text.
"
mx = top[0][1]
pal = ["#0071e3","#34c759","#ff3b30","#ff9f0a","#5e5ce6","#00c7be","#ff6b9d"]
out = ""
for word,cnt in top:
sz = 0.76 + (cnt/mx)*1.85
col = random.choice(pal)
op = 0.45 + (cnt/mx)*0.55
fw = 300 + int((cnt/mx)*500)
rot = random.choice([-3,-1,0,0,0,1,3])
out += (f''
f'{word}')
return f'{out}
'
# ─────────────────────────────────────────────────────────────────────────────
# ═══════════════════════ MASTER CSS ═══════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# STATE
# ─────────────────────────────────────────────────────────────────────────────
if "page" not in st.session_state:
st.session_state["page"] = "classify"
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ NAVIGATION ═══════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
pg = st.session_state["page"]
st.markdown(f"""
""", unsafe_allow_html=True)
# Nav button row (functional, visually hidden by CSS)
c1,c2,c3,_ = st.columns([1,1,1,6])
with c1:
if st.button("Classify", key="nb1", use_container_width=True):
st.session_state["page"] = "classify"; st.rerun()
with c2:
if st.button("Q & A", key="nb2", use_container_width=True):
st.session_state["page"] = "qa"; st.rerun()
with c3:
if st.button("Insights", key="nb3", use_container_width=True):
st.session_state["page"] = "insights"; st.rerun()
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ HERO ═════════════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
Daily Mirror · AI Intelligence · Assignment 01
News that
understands itself.
Classify articles, extract answers, and surface visual
insights from Daily Mirror news — powered by fine-tuned
Hugging Face Transformers.
""", unsafe_allow_html=True)
# Feature bar
st.markdown("""
🧠
DistilBERT Classifier
Fine-tuned on 5 news categories
💬
RoBERTa Q&A
Extractive answers with highlights
📊
Visual Insights
Charts, word clouds, distributions
⚙️
NLP Preprocessing
7-step NLTK pipeline built in
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ PAGE: CLASSIFY ═══════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
if pg == "classify":
# ── Section header ──────────────────────────────────────────────────────
st.markdown("""
Component 01 · Text Classification
Every article,
perfectly categorised.
Upload your CSV and a fine-tuned DistilBERT model
instantly sorts each article into one of five categories —
Business, Opinion, Political Gossip, Sports, or World News.
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
# Image banner
st.markdown("""
Upload · Preprocess · Classify · Download
News Classification at Scale
7-step preprocessing pipeline · Batch inference · CSV output
""", unsafe_allow_html=True)
col_L, col_R = st.columns([3, 2], gap="large")
# ── LEFT COLUMN ─────────────────────────────────────────────────────────
with col_L:
# Upload card
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Step 01 — Upload', unsafe_allow_html=True)
st.markdown('
Select your CSV file
', unsafe_allow_html=True)
st.markdown(
'
Requires a '
'content'
' column. Compatible with the evaluation.csv provided with this assignment.
',
unsafe_allow_html=True)
uploaded = st.file_uploader("", type=["csv"], key="cls_upload",
label_visibility="collapsed")
if uploaded:
df = pd.read_csv(uploaded)
st.success(f"✓ {len(df):,} records loaded · {len(df.columns)} columns")
if "content" not in df.columns:
st.error(f"Column `content` not found. "
f"Found: **{', '.join(df.columns.tolist())}**")
st.stop()
with st.expander("Preview — first 5 rows"):
st.dataframe(df.head(), use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
if st.button("Run Classification Pipeline", key="run_cls"):
with st.status("⚙️ Preprocessing text (7 steps)…",
expanded=False) as s:
cleaned = df["content"].fillna("").apply(preprocess).tolist()
s.update(label="✅ Preprocessing complete", state="complete")
with st.spinner("Loading model — first run takes ~30s…"):
clf = load_clf()
prog = st.progress(0, text="Classifying articles…")
preds, confs = [], []
for i in range(0, len(cleaned), 16):
batch = [t if t.strip() else " " for t in cleaned[i:i+16]]
results = clf(batch, truncation=True, max_length=512)
for r in results:
preds.append(resolve(r["label"]))
confs.append(round(r["score"], 4))
pct = min(int((i+16)/len(cleaned)*100), 100)
prog.progress(pct, text=f"Classifying… {pct}%")
time.sleep(0.01)
prog.empty()
out = df.copy()
out["class"] = preds
out["confidence"] = confs
st.session_state["out_df"] = out
st.success("✅ Classification complete — results ready below.")
st.markdown("
", unsafe_allow_html=True)
# Results
if "out_df" in st.session_state:
out = st.session_state["out_df"]
counts = out["class"].value_counts()
# Stat tiles
st.markdown('
', unsafe_allow_html=True)
for label, meta in CATS.items():
n = counts.get(label, 0)
st.markdown(f"""
{meta['icon']}
{n}
{label.replace('_',' ')}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# Tabbed results
st.markdown('
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Results', unsafe_allow_html=True)
st.markdown('
'
'Classified Articles
', unsafe_allow_html=True)
all_t, *cat_ts = st.tabs(
["All Articles"] +
[f"{CATS[l]['icon']} {l.replace('_',' ')}" for l in CATS]
)
with all_t:
st.dataframe(out[["content","class","confidence"]],
use_container_width=True, height=320)
for i, label in enumerate(CATS):
with cat_ts[i]:
sub = out[out["class"]==label][["content","confidence"]]
if sub.empty:
st.info(f"No articles classified as **{label.replace('_',' ')}**.")
else:
st.dataframe(sub, use_container_width=True, height=280)
st.markdown("
", unsafe_allow_html=True)
avg_c = out["confidence"].mean() if "confidence" in out.columns else 0
hi = (out["confidence"]>=0.9).sum() if "confidence" in out.columns else 0
st.markdown(
f'
'
f'Average confidence {avg_c:.1%}'
f' · '
f'High confidence ≥ 90% {hi}'
f'
', unsafe_allow_html=True)
st.download_button(
"⬇ Download output.csv",
data=out.to_csv(index=False).encode("utf-8"),
file_name="output.csv", mime="text/csv",
)
st.markdown("
", unsafe_allow_html=True)
else:
st.markdown("""
◉
No file selected yet
Upload your evaluation.csv above to begin
""", unsafe_allow_html=True)
# ── RIGHT COLUMN ────────────────────────────────────────────────────────
with col_R:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Reference', unsafe_allow_html=True)
st.markdown('
'
'Five News Categories
', unsafe_allow_html=True)
for label, meta in CATS.items():
st.markdown(f"""
{meta['icon']}
{label.replace('_',' ')}
{meta['desc']}
""", unsafe_allow_html=True)
if "out_df" in st.session_state:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
'
'Distribution Chart', unsafe_allow_html=True)
st.bar_chart(
st.session_state["out_df"]["class"].value_counts(),
use_container_width=True, height=190,
)
st.markdown("
", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True) # /section-inner /section-alt
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ PAGE: Q&A ════════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
elif pg == "qa":
st.markdown("""
Component 02 · Question-Answering
Ask anything.
Get precise answers.
Paste any news article and ask a natural language question.
The AI reads the passage and extracts an exact, source-referenced answer
— powered by deepset/roberta-base-squad2 (SQuAD 2.0).
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
# Image banner
st.markdown("""
Extractive QA · RoBERTa · SQuAD 2.0
Intelligence That Reads Closely
Ask in plain language · Get source-highlighted answers
""", unsafe_allow_html=True)
col_qa, col_side = st.columns([3, 2], gap="large")
with col_qa:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
'
'Input', unsafe_allow_html=True)
st.markdown('
'
'Paste article & ask
', unsafe_allow_html=True)
src = st.radio("Text Source",
["Paste article text", "Pick from classified results"],
horizontal=True, key="qa_src")
context = ""
if src == "Paste article text":
context = st.text_area(
"News Article",
height=210,
placeholder="Paste any Daily Mirror news article here…",
key="qa_ctx",
)
else:
if "out_df" not in st.session_state:
st.info("ℹ️ Run the **Classify** pipeline first to use this option.")
else:
out_df = st.session_state["out_df"]
sel_cat = st.selectbox(
"Filter Category",
["All"] + [l.replace("_"," ") for l in CATS],
key="qa_cat",
)
pool = (out_df if sel_cat == "All"
else out_df[out_df["class"].isin(
[sel_cat, sel_cat.replace(" ","_")])])
if not pool.empty:
idx = st.selectbox(
"Select Article",
pool.index.tolist(),
format_func=lambda i:
f"#{i} — {str(pool.loc[i,'content'])[:72]}…",
key="qa_idx",
)
row = pool.loc[idx]
context = str(row["content"])
lbl = row.get("class","")
meta = CATS.get(lbl, {"icon":"◉","color":"#1d1d1f","bg":"#f5f5f7"})
conf_v = row.get("confidence", None)
st.markdown(f"""
{meta['icon']} {lbl.replace('_',' ')}
{f" · {conf_v:.1%}" if conf_v else ""}
{context}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
question = st.text_input(
"Your Question",
placeholder="e.g. Who announced the new policy?",
key="qa_q",
)
st.markdown("
", unsafe_allow_html=True)
if st.button("Extract Answer", key="run_qa"):
if not context.strip():
st.warning("⚠️ Please provide article text.")
elif not question.strip():
st.warning("⚠️ Please enter a question.")
else:
with st.spinner("Reading the passage…"):
qa_pipe = load_qa()
result = qa_pipe(question=question, context=context)
ans = result["answer"]
score = result["score"]
s, e = result["start"], result["end"]
highlighted = (
context[:s]
+ f'
'
f'{context[s:e]}'
+ context[e:]
)
st.markdown(f"""
Answer
{ans}
Confidence {score:.1%}
· deepset/roberta-base-squad2
""", unsafe_allow_html=True)
with st.expander("View highlighted source context"):
st.markdown(
f'
{highlighted}
',
unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
with col_side:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
'
'Tips', unsafe_allow_html=True)
st.markdown('
'
'Better questions,
better answers
', unsafe_allow_html=True)
for i, (t, d) in enumerate([
("Who · What · When · Where",
"Factual questions extract the sharpest answers"),
("Provide full context",
"Longer passages give the model more evidence to work from"),
("Stay specific",
"Narrow, focused questions outperform vague ones every time"),
("Full sentence questions",
"Questions ending with '?' consistently perform best"),
("Avoid yes / no",
"Open-ended questions return richer, more informative answers"),
]):
st.markdown(f"""
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Model', unsafe_allow_html=True)
for k, v in [
("Architecture", "RoBERTa Base"),
("Training Data", "SQuAD 2.0"),
("Task Type", "Extractive Q&A"),
("Provider", "deepset · Hugging Face"),
]:
st.markdown(f"""
{k}
{v}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ PAGE: INSIGHTS ═══════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
elif pg == "insights":
# Dark hero section
st.markdown("""
Component 03 · Visual Insights
Clarity from
every angle.
Distribution breakdowns, word clouds, confidence analysis,
and article spotlights — everything you need to understand
your classified corpus at a glance.
""", unsafe_allow_html=True)
if "out_df" not in st.session_state:
st.markdown("""
◈
No classified data yet
Run the Classify pipeline first,
then return here for visual insights.
""", unsafe_allow_html=True)
st.stop()
out_df = st.session_state["out_df"]
total = len(out_df)
counts = out_df["class"].value_counts()
# ── Section A: Distribution ──────────────────────────────────────────
st.markdown("""
01 · Distribution
How your corpus breaks down.
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
col_da, col_db = st.columns([2, 3], gap="large")
with col_da:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Breakdown', unsafe_allow_html=True)
for label, meta in CATS.items():
n = counts.get(label, 0)
pct = n / total if total > 0 else 0
st.markdown(f"""
{meta['icon']}
{label.replace('_',' ')}
{n} · {pct:.0%}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
with col_db:
try:
import plotly.express as px
cdf = counts.reset_index()
cdf.columns = ["Category","Count"]
cdf["Label"] = cdf["Category"].str.replace("_"," ")
cmap = {k: CATS[k]["color"] for k in CATS}
fig = px.bar(cdf, x="Label", y="Count", color="Category",
color_discrete_map=cmap, text="Count",
labels={"Label":"","Count":""})
fig.update_layout(
plot_bgcolor="white",paper_bgcolor="white",
font=dict(family="-apple-system,BlinkMacSystemFont,'SF Pro Text',sans-serif",
size=12,color="#1d1d1f"),
showlegend=False,margin=dict(l=0,r=0,t=10,b=0),
xaxis=dict(showgrid=False,color="#86868b",
tickfont=dict(size=11,color="#6e6e73")),
yaxis=dict(gridcolor="#f5f5f7",color="#86868b"),
)
fig.update_traces(textposition="outside",
textfont=dict(size=12,color="#1d1d1f"),
marker_line_width=0,
marker_corner_radius=6)
st.plotly_chart(fig, use_container_width=True)
except ImportError:
st.bar_chart(counts, use_container_width=True, height=270)
st.markdown("
", unsafe_allow_html=True)
# ── Section B: Word Cloud ────────────────────────────────────────────
st.markdown("""
02 · Word Cloud
The language of the news.
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
col_wl, col_wr = st.columns([2, 3], gap="large")
with col_wl:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
Configure', unsafe_allow_html=True)
st.markdown('
'
'Build word cloud
', unsafe_allow_html=True)
wc_sel = st.selectbox("Category Filter",
["All"]+[l.replace("_"," ") for l in CATS],
key="wc_cat")
wc_n = st.slider("Number of Words", 20, 120, 70, key="wc_n")
st.markdown("
", unsafe_allow_html=True)
if st.button("Generate Word Cloud", key="run_wc"):
lbl = wc_sel.replace(" ","_") if wc_sel != "All" else "All"
corpus = (" ".join(out_df["content"].fillna("").tolist()) if lbl == "All"
else " ".join(
out_df[out_df["class"].isin([lbl,wc_sel])]["content"]
.fillna("").tolist()))
try:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
accent = CATS.get(lbl,{}).get("color","#0071e3")
processed = preprocess(corpus)
def _cf(*a,**k):
r,g,b = mcolors.to_rgb(accent)
f = random.uniform(.45,1.)
return f"rgb({int(r*f*255)},{int(g*f*255)},{int(b*f*255)})"
wc = WordCloud(width=900,height=360,
background_color="white",
color_func=_cf,max_words=wc_n,
prefer_horizontal=.82).generate(processed)
fig_wc,ax = plt.subplots(figsize=(12,4))
ax.imshow(wc,interpolation="bilinear"); ax.axis("off")
fig_wc.patch.set_facecolor("white"); plt.tight_layout(pad=0)
st.session_state["wc_fig"] = fig_wc
st.session_state["wc_html"] = None
except ImportError:
st.session_state["wc_html"] = word_cloud_html(preprocess(corpus), wc_n)
st.session_state["wc_fig"] = None
st.markdown("
", unsafe_allow_html=True)
with col_wr:
st.markdown('
', unsafe_allow_html=True)
st.markdown('
'
'Word Frequency Canvas', unsafe_allow_html=True)
if st.session_state.get("wc_fig"):
import matplotlib.pyplot as plt
st.pyplot(st.session_state["wc_fig"])
elif st.session_state.get("wc_html"):
st.markdown(st.session_state["wc_html"], unsafe_allow_html=True)
else:
st.markdown("""
◎
Configure and generate your word cloud
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# ── Section C: Confidence ────────────────────────────────────────────
if "confidence" in out_df.columns:
st.markdown("""
03 · Confidence Analysis
How certain is the model?
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
c1,c2,c3 = st.columns(3, gap="large")
for col,(val,lbl,color) in zip([c1,c2,c3],[
(f"{out_df['confidence'].mean():.1%}","Average Confidence","#0071e3"),
(str((out_df["confidence"]>=.9).sum()),"High Confidence ≥ 90%","#34c759"),
(str((out_df["confidence"]<.7).sum()), "Low Confidence < 70%", "#ff3b30"),
]):
with col:
st.markdown(f"""
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
try:
import plotly.express as px
cmap = {k: CATS[k]["color"] for k in CATS}
fig2 = px.histogram(out_df,x="confidence",color="class",
nbins=25,color_discrete_map=cmap,
labels={"confidence":"Confidence Score","class":""})
fig2.update_layout(
plot_bgcolor="white",paper_bgcolor="white",
font=dict(family="-apple-system,BlinkMacSystemFont,'SF Pro Text',sans-serif",
size=11,color="#1d1d1f"),
margin=dict(l=0,r=0,t=10,b=0),bargap=.06,
xaxis=dict(showgrid=False,color="#86868b"),
yaxis=dict(gridcolor="#f5f5f7",color="#86868b"),
legend=dict(bgcolor="white",bordercolor="#e2e2e7",borderwidth=1,
font=dict(size=11)),
)
st.plotly_chart(fig2, use_container_width=True)
except ImportError:
st.dataframe(out_df.groupby("class")["confidence"].describe().round(3),
use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
# ── Section D: Article Length ────────────────────────────────────────
st.markdown("""
04 · Article Length
Word count by category.
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
out_df["word_count"] = out_df["content"].fillna("").apply(lambda x: len(x.split()))
try:
import plotly.express as px
cmap = {k: CATS[k]["color"] for k in CATS}
fig3 = px.box(out_df,x="class",y="word_count",color="class",
color_discrete_map=cmap,points="outliers",
labels={"class":"","word_count":"Word Count"})
fig3.update_layout(
plot_bgcolor="white",paper_bgcolor="white",
font=dict(family="-apple-system,BlinkMacSystemFont,'SF Pro Text',sans-serif",
size=11,color="#1d1d1f"),
showlegend=False,margin=dict(l=0,r=0,t=10,b=0),
xaxis=dict(showgrid=False,color="#86868b",
tickfont=dict(size=11,color="#6e6e73")),
yaxis=dict(gridcolor="#f5f5f7",color="#86868b"),
)
st.plotly_chart(fig3, use_container_width=True)
except ImportError:
st.dataframe(out_df.groupby("class")["word_count"].describe().round(1),
use_container_width=True)
st.markdown("
", unsafe_allow_html=True)
# ── Section E: Spotlight ─────────────────────────────────────────────
st.markdown("""
05 · Article Spotlight
Discover a random article.
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
if st.button("Shuffle Article", key="spot"):
row = out_df.sample(1).iloc[0]
label = row.get("class","")
meta = CATS.get(label, {"icon":"◉","color":"#1d1d1f","bg":"#f5f5f7"})
conf_v = row.get("confidence", None)
text = str(row["content"])
wc_c = len(text.split())
st.markdown(f"""
{meta['icon']} {label.replace('_',' ')}
{f'{conf_v:.1%} confidence' if conf_v else ""}
{wc_c} words
{text[:640]}{"…" if len(text)>640 else ""}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# ══════════════ FOOTER ═══════════════════════════════════════════════════════
# ─────────────────────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)