Akilashamnaka12 commited on
Commit
dafbab2
Β·
verified Β·
1 Parent(s): a6db15a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +532 -37
src/streamlit_app.py CHANGED
@@ -1,40 +1,535 @@
1
- import altair as alt
2
- import numpy as np
 
3
  import pandas as pd
4
  import streamlit as st
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import nltk
4
  import pandas as pd
5
  import streamlit as st
6
+ import matplotlib.pyplot as plt
7
+ from collections import Counter
8
+ from wordcloud import WordCloud
9
+ from nltk.corpus import stopwords
10
+ from nltk.tokenize import word_tokenize
11
 
12
+ # ─── HF token (set as a Secret in Space settings for private/gated models) ────
13
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
14
+
15
+ # ─── Page Config ──────────────────────────────────────────────────────────────
16
+ st.set_page_config(
17
+ page_title="NewsLens Β· Sri Lanka",
18
+ page_icon="πŸ”Ž",
19
+ layout="wide",
20
+ initial_sidebar_state="collapsed",
21
+ )
22
+
23
+ # ─── NLTK – write to /tmp so HF Spaces (read-only FS) can cache data ──────────
24
+ NLTK_DATA_DIR = "/tmp/nltk_data"
25
+ os.makedirs(NLTK_DATA_DIR, exist_ok=True)
26
+ if NLTK_DATA_DIR not in nltk.data.path:
27
+ nltk.data.path.insert(0, NLTK_DATA_DIR)
28
+
29
+ @st.cache_resource
30
+ def download_nltk():
31
+ for pkg in ["stopwords", "punkt", "punkt_tab"]:
32
+ try:
33
+ nltk.download(pkg, download_dir=NLTK_DATA_DIR, quiet=True)
34
+ except Exception:
35
+ pass
36
+
37
+ download_nltk()
38
+
39
+ # ─── CSS ──────────────────────────────────────────────────────────────────────
40
+ st.markdown("""
41
+ <style>
42
+ @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Sans:ital,wght@0,300;0,400;0,500;1,300&display=swap');
43
+
44
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
45
+
46
+ html, body, [data-testid="stAppViewContainer"] {
47
+ background: #07090f !important;
48
+ color: #e8eaf0 !important;
49
+ font-family: 'DM Sans', sans-serif !important;
50
+ }
51
+ [data-testid="stAppViewContainer"] { padding: 0 !important; }
52
+ [data-testid="stHeader"] { background: transparent !important; }
53
+ section.main > div { padding-top: 0 !important; }
54
+ .block-container { padding: 0 2rem 4rem 2rem !important; max-width: 1280px !important; }
55
+
56
+ /* Hero */
57
+ .hero {
58
+ background: linear-gradient(135deg, #0b1120 0%, #0d1f3c 55%, #062a3a 100%);
59
+ border-bottom: 1px solid #1a2a44;
60
+ padding: 3.5rem 3rem 2.8rem;
61
+ position: relative; overflow: hidden;
62
+ }
63
+ .hero::before {
64
+ content:''; position:absolute; inset:0;
65
+ background: radial-gradient(ellipse 70% 60% at 80% 30%, rgba(0,200,180,.09) 0%, transparent 70%);
66
+ pointer-events: none;
67
+ }
68
+ .hero-eyebrow { font-size:.75rem; font-weight:500; letter-spacing:.18em; color:#00c8b4; text-transform:uppercase; margin-bottom:.9rem; }
69
+ .hero-title { font-family:'Syne',sans-serif; font-size:clamp(2.2rem,5vw,3.6rem); font-weight:800; line-height:1.08; color:#fff; margin-bottom:1rem; }
70
+ .hero-title span { color:#00c8b4; }
71
+ .hero-sub { font-size:1.05rem; font-weight:300; line-height:1.65; color:#94a3b8; max-width:560px; }
72
+
73
+ /* Tabs */
74
+ [data-testid="stTabs"] > div:first-child { background:#0b111f; border-bottom:1px solid #1a2a44; padding:0 2rem; gap:0 !important; }
75
+ [data-testid="stTabs"] button { font-family:'Syne',sans-serif !important; font-size:.88rem !important; font-weight:600 !important; color:#64748b !important; padding:1rem 1.5rem !important; border-radius:0 !important; border-bottom:2px solid transparent !important; transition:color .2s,border-color .2s !important; }
76
+ [data-testid="stTabs"] button:hover { color:#cbd5e1 !important; }
77
+ [data-testid="stTabs"] button[aria-selected="true"] { color:#00c8b4 !important; border-bottom-color:#00c8b4 !important; background:transparent !important; }
78
+
79
+ /* Cards */
80
+ .card { background:#0f172a; border:1px solid #1e2d45; border-radius:14px; padding:1.8rem 1.8rem 1.6rem; margin-bottom:1.4rem; transition:border-color .2s,box-shadow .2s; }
81
+ .card:hover { border-color:#00c8b4; box-shadow:0 0 28px rgba(0,200,180,.08); }
82
+ .card-title { font-family:'Syne',sans-serif; font-size:1rem; font-weight:700; color:#e2e8f0; margin-bottom:.35rem; }
83
+ .card-sub { font-size:.82rem; color:#64748b; font-weight:300; margin-bottom:1.1rem; }
84
+
85
+ /* Labels / chips / badges */
86
+ .section-label { font-family:'Syne',sans-serif; font-size:.72rem; font-weight:700; letter-spacing:.14em; text-transform:uppercase; color:#00c8b4; margin-bottom:.6rem; }
87
+ .stat-row { display:flex; gap:1rem; flex-wrap:wrap; margin:1rem 0; }
88
+ .stat-chip { background:#1e2d45; border-radius:8px; padding:.55rem 1.1rem; font-family:'Syne',sans-serif; font-size:.85rem; font-weight:600; color:#e2e8f0; }
89
+ .stat-chip span { color:#00c8b4; font-size:1.15rem; display:block; }
90
+ .badge { display:inline-block; padding:.25rem .7rem; border-radius:999px; font-size:.72rem; font-weight:600; letter-spacing:.05em; text-transform:uppercase; }
91
+ .badge-teal { background:rgba(0,200,180,.15); color:#00c8b4; border:1px solid rgba(0,200,180,.3); }
92
+ .badge-blue { background:rgba(59,130,246,.15); color:#60a5fa; border:1px solid rgba(59,130,246,.3); }
93
+ .badge-amber { background:rgba(245,158,11,.12); color:#fbbf24; border:1px solid rgba(245,158,11,.3); }
94
+ .badge-rose { background:rgba(244,63,94,.12); color:#fb7185; border:1px solid rgba(244,63,94,.3); }
95
+ .badge-violet { background:rgba(139,92,246,.12); color:#a78bfa; border:1px solid rgba(139,92,246,.3); }
96
+
97
+ /* Answer box */
98
+ .answer-box { background:linear-gradient(135deg,#0b2034,#091c2e); border:1px solid #00c8b4; border-radius:12px; padding:1.4rem 1.6rem; margin-top:1.2rem; }
99
+ .answer-label { font-family:'Syne',sans-serif; font-size:.68rem; font-weight:700; letter-spacing:.14em; text-transform:uppercase; color:#00c8b4; margin-bottom:.5rem; }
100
+ .answer-text { font-size:1.05rem; color:#e2e8f0; line-height:1.7; }
101
+ .score-bar-wrap { margin-top:.8rem; }
102
+ .score-bar-label { font-size:.75rem; color:#64748b; margin-bottom:.25rem; }
103
+ .score-bar-outer { background:#1e2d45; border-radius:999px; height:6px; }
104
+ .score-bar-inner { background:linear-gradient(90deg,#00c8b4,#0ea5e9); border-radius:999px; height:6px; }
105
+
106
+ /* Inputs */
107
+ [data-testid="stFileUploader"] { background:#0f172a !important; border:1.5px dashed #1e3a5f !important; border-radius:12px !important; padding:1.5rem !important; }
108
+ [data-testid="stFileUploader"]:hover { border-color:#00c8b4 !important; }
109
+ textarea { background:#0f172a !important; border:1px solid #1e2d45 !important; border-radius:10px !important; color:#e2e8f0 !important; font-family:'DM Sans',sans-serif !important; font-size:.95rem !important; }
110
+ textarea:focus { border-color:#00c8b4 !important; box-shadow:0 0 0 2px rgba(0,200,180,.18) !important; }
111
+
112
+ /* Buttons */
113
+ .stButton > button { background:linear-gradient(135deg,#00c8b4,#0ea5e9) !important; color:#07090f !important; border:none !important; border-radius:8px !important; font-family:'Syne',sans-serif !important; font-weight:700 !important; font-size:.88rem !important; letter-spacing:.04em !important; padding:.6rem 1.6rem !important; cursor:pointer !important; transition:opacity .2s,box-shadow .2s !important; }
114
+ .stButton > button:hover { opacity:.88 !important; box-shadow:0 4px 20px rgba(0,200,180,.35) !important; }
115
+ [data-testid="stDownloadButton"] button { background:transparent !important; border:1.5px solid #00c8b4 !important; color:#00c8b4 !important; font-family:'Syne',sans-serif !important; font-weight:700 !important; font-size:.85rem !important; border-radius:8px !important; padding:.55rem 1.4rem !important; transition:background .2s !important; }
116
+ [data-testid="stDownloadButton"] button:hover { background:rgba(0,200,180,.12) !important; }
117
+
118
+ /* Misc */
119
+ hr { border-color:#1e2d45 !important; margin:1.8rem 0 !important; }
120
+ [data-testid="stSelectbox"] > div > div { background:#0f172a !important; border-color:#1e2d45 !important; color:#e2e8f0 !important; border-radius:8px !important; }
121
+ ::-webkit-scrollbar { width:6px; }
122
+ ::-webkit-scrollbar-track { background:#0b111f; }
123
+ ::-webkit-scrollbar-thumb { background:#1e2d45; border-radius:3px; }
124
+ ::-webkit-scrollbar-thumb:hover { background:#00c8b4; }
125
+ [data-testid="stTabsContent"] { padding:2rem 0 !important; }
126
+ </style>
127
+ """, unsafe_allow_html=True)
128
+
129
+ # ─── Constants ────────────────────────────────────────────────────────────────
130
+ CATEGORIES = ["Business", "Opinion", "Political_gossip", "Sports", "World_news"]
131
+
132
+ CAT_BADGE = {
133
+ "Business": "badge-teal", "Opinion": "badge-blue",
134
+ "Political_gossip": "badge-amber", "Sports": "badge-rose", "World_news": "badge-violet",
135
+ }
136
+ CAT_COLOR = {
137
+ "Business": "#00c8b4", "Opinion": "#60a5fa",
138
+ "Political_gossip": "#fbbf24", "Sports": "#fb7185", "World_news": "#a78bfa",
139
+ }
140
+
141
+ # Map whatever the model returns β†’ one of the 5 assignment class names
142
+ LABEL_MAP = {
143
+ "business": "Business", "opinion": "Opinion",
144
+ "political_gossip": "Political_gossip", "political gossip": "Political_gossip",
145
+ "sports": "Sports", "world_news": "World_news", "world news": "World_news", "world": "World_news",
146
+ "label_0": "Business", "label_1": "Opinion",
147
+ "label_2": "Political_gossip", "label_3": "Sports", "label_4": "World_news",
148
+ "business and finance": "Business", "opinions and editorials": "Opinion",
149
+ "politics": "Political_gossip",
150
+ }
151
+
152
+ def normalise_label(raw: str) -> str:
153
+ if raw in CATEGORIES:
154
+ return raw
155
+ return LABEL_MAP.get(raw.strip().lower(), raw)
156
+
157
+ # ─── Text preprocessor ────────────────────────────────────────────────────────
158
+ def preprocess_text(text: str) -> str:
159
+ if not isinstance(text, str):
160
+ return ""
161
+ text = text.lower()
162
+ text = re.sub(r"http\S+|www\.\S+", " ", text)
163
+ text = re.sub(r"[^a-z\s]", " ", text)
164
+ text = re.sub(r"\s+", " ", text).strip()
165
+ try:
166
+ sw = set(stopwords.words("english"))
167
+ tokens = word_tokenize(text)
168
+ text = " ".join(t for t in tokens if t not in sw and len(t) > 2)
169
+ except Exception:
170
+ pass
171
+ return text
172
+
173
+ # ─── Model loaders ────────────────────────────────────────────────────────────
174
+ @st.cache_resource(show_spinner=False)
175
+ def load_classifier():
176
+ """
177
+ Replace MODEL_ID with your fine-tuned model pushed to HF Hub in Task 4.
178
+ e.g. "your-username/daily-mirror-news-classifier"
179
+ If your Space or model is private, add HF_TOKEN as a Secret in Space settings.
180
+ """
181
+ MODEL_ID = "valurank/distilroberta-news-category" # ← swap after Task 4
182
+
183
+ try:
184
+ from transformers import pipeline as hf_pipeline
185
+ kwargs = {"task": "text-classification", "model": MODEL_ID,
186
+ "truncation": True, "max_length": 512}
187
+ if HF_TOKEN:
188
+ kwargs["token"] = HF_TOKEN
189
+ return hf_pipeline(**kwargs), None
190
+ except Exception as e:
191
+ return None, str(e)
192
+
193
+
194
+ @st.cache_resource(show_spinner=False)
195
+ def load_qa():
196
+ """
197
+ FIX: load tokenizer + model explicitly and pass them to pipeline().
198
+ This avoids the 'Unknown task question-answering' error that occurs when
199
+ transformers tries to auto-detect the task from a bare model string on
200
+ some versions / environments (including HF Spaces).
201
+ """
202
+ QA_MODEL = "deepset/roberta-base-squad2"
203
+ try:
204
+ from transformers import (
205
+ AutoTokenizer,
206
+ AutoModelForQuestionAnswering,
207
+ pipeline as hf_pipeline,
208
+ )
209
+ tok = AutoTokenizer.from_pretrained(QA_MODEL)
210
+ model = AutoModelForQuestionAnswering.from_pretrained(QA_MODEL)
211
+ qa = hf_pipeline(
212
+ task="question-answering", # explicit task string – fixes the error
213
+ model=model,
214
+ tokenizer=tok,
215
+ )
216
+ return qa, None
217
+ except Exception as e:
218
+ return None, str(e)
219
+
220
+ # ══════════════════════════════════════════════════════════════════════════════
221
+ # HERO
222
+ # ══════════════════════════════════════════════════════════════════════════════
223
+ st.markdown("""
224
+ <div class="hero">
225
+ <div class="hero-eyebrow">πŸ”Ž &nbsp;Text Analytics Β· DA3111</div>
226
+ <div class="hero-title">News<span>Lens</span></div>
227
+ <div class="hero-sub">
228
+ Classify Sri Lankan news articles, interrogate content with Q&amp;A,
229
+ and surface editorial insights β€” all in one unified workspace.
230
+ </div>
231
+ </div>
232
+ """, unsafe_allow_html=True)
233
+
234
+ tab1, tab2, tab3 = st.tabs([
235
+ " πŸ“‚ Text Classification ",
236
+ " πŸ’¬ Q & A Pipeline ",
237
+ " πŸ“Š Insights ",
238
+ ])
239
+
240
+ # ══════════════════════════════════════════════════════════════════════════════
241
+ # TAB 1 – TEXT CLASSIFICATION
242
+ # ══════════════════════════════════════════════════════════════════════════════
243
+ with tab1:
244
+ left, right = st.columns([1.1, 1], gap="large")
245
+
246
+ with left:
247
+ st.markdown('<div class="section-label">Upload</div>', unsafe_allow_html=True)
248
+ st.markdown("""
249
+ <div class="card">
250
+ <div class="card-title">Upload your CSV file</div>
251
+ <div class="card-sub">Must contain a <code>content</code> column with news excerpts.</div>
252
+ """, unsafe_allow_html=True)
253
+ uploaded = st.file_uploader("", type=["csv"], label_visibility="collapsed")
254
+ st.markdown("</div>", unsafe_allow_html=True)
255
+
256
+ if uploaded:
257
+ try:
258
+ uploaded.seek(0) # reset buffer – important on HF Spaces
259
+ df_raw = pd.read_csv(uploaded)
260
+ except Exception as e:
261
+ st.error(f"Could not parse CSV: {e}")
262
+ st.stop()
263
+
264
+ if "content" not in df_raw.columns:
265
+ st.error("❌ The uploaded file must have a `content` column.")
266
+ else:
267
+ st.markdown(f"""
268
+ <div class="stat-row">
269
+ <div class="stat-chip"><span>{len(df_raw)}</span>Records</div>
270
+ <div class="stat-chip"><span>{df_raw.shape[1]}</span>Columns</div>
271
+ </div>""", unsafe_allow_html=True)
272
+
273
+ st.markdown('<div class="section-label" style="margin-top:1rem">Preview</div>',
274
+ unsafe_allow_html=True)
275
+ st.dataframe(df_raw.head(5), use_container_width=True, hide_index=True)
276
+
277
+ run_btn = st.button("⚑ Run Classification", use_container_width=True)
278
+
279
+ if run_btn:
280
+ with st.spinner("Loading classifier… (first run ~30 s on HF Spaces)"):
281
+ clf, err = load_classifier()
282
+ if err:
283
+ st.error(f"Model load error: {err}")
284
+ else:
285
+ df_out = df_raw.copy()
286
+ pred_labels = []
287
+ prog = st.progress(0, text="Classifying…")
288
+ texts = df_out["content"].fillna("").tolist()
289
+
290
+ for i, txt in enumerate(texts):
291
+ clean = preprocess_text(txt) or txt[:512]
292
+ try:
293
+ raw = clf(clean[:512])[0]["label"]
294
+ label = normalise_label(raw)
295
+ except Exception:
296
+ label = "Unknown"
297
+ pred_labels.append(label)
298
+ prog.progress((i + 1) / len(texts),
299
+ text=f"Classifying {i+1}/{len(texts)}…")
300
+
301
+ prog.empty()
302
+ df_out["class"] = pred_labels
303
+ st.session_state["df_classified"] = df_out
304
+ st.session_state["classification_done"] = True
305
+ st.rerun()
306
+
307
+ with right:
308
+ st.markdown('<div class="section-label">Results</div>', unsafe_allow_html=True)
309
+
310
+ if st.session_state.get("classification_done"):
311
+ df_out = st.session_state["df_classified"]
312
+ counts = df_out["class"].value_counts()
313
+
314
+ chip_html = '<div class="stat-row">'
315
+ for cat, cnt in counts.items():
316
+ badge = CAT_BADGE.get(cat, "badge-teal")
317
+ chip_html += (f'<div class="stat-chip"><span>{cnt}</span>'
318
+ f'<span class="badge {badge}">{cat.replace("_"," ")}</span></div>')
319
+ chip_html += "</div>"
320
+ st.markdown(chip_html, unsafe_allow_html=True)
321
+
322
+ cols = [c for c in ["content", "class"] if c in df_out.columns]
323
+ st.markdown('<div class="card" style="margin-top:.8rem">', unsafe_allow_html=True)
324
+ st.markdown('<div class="card-title">Classified Records</div>', unsafe_allow_html=True)
325
+ st.dataframe(df_out[cols].head(20), use_container_width=True, hide_index=True,
326
+ column_config={"content": st.column_config.TextColumn("Content", width="large")})
327
+ st.markdown("</div>", unsafe_allow_html=True)
328
+
329
+ st.download_button(
330
+ "⬇ Download output.csv",
331
+ data=df_out.to_csv(index=False).encode("utf-8"),
332
+ file_name="output.csv", mime="text/csv",
333
+ use_container_width=True,
334
+ )
335
+ else:
336
+ st.markdown("""
337
+ <div class="card" style="text-align:center;padding:3.5rem 2rem;">
338
+ <div style="font-size:3rem;margin-bottom:1rem">πŸ“‚</div>
339
+ <div style="font-family:'Syne',sans-serif;font-size:1rem;font-weight:700;color:#334155;">
340
+ Upload a CSV to see results</div>
341
+ <div style="font-size:.82rem;color:#475569;margin-top:.4rem;">
342
+ Predictions appear here after classification runs.</div>
343
+ </div>""", unsafe_allow_html=True)
344
+
345
+ # ══════════════════════════════════════════════════════════════════════════════
346
+ # TAB 2 – Q&A PIPELINE
347
+ # ═════════════════════════════════════════════════════════════════��════════════
348
+ with tab2:
349
+ l2, r2 = st.columns([1, 1], gap="large")
350
+
351
+ with l2:
352
+ st.markdown('<div class="section-label">Context</div>', unsafe_allow_html=True)
353
+ st.markdown('<div class="card">', unsafe_allow_html=True)
354
+ st.markdown('<div class="card-title">Paste a news excerpt</div>', unsafe_allow_html=True)
355
+ st.markdown('<div class="card-sub">The Q&A model will read this as its context.</div>',
356
+ unsafe_allow_html=True)
357
+
358
+ default_ctx = ""
359
+ if st.session_state.get("classification_done"):
360
+ df_c = st.session_state["df_classified"]
361
+ if len(df_c):
362
+ default_ctx = str(df_c["content"].iloc[0])
363
+
364
+ context_text = st.text_area("", value=default_ctx, height=260,
365
+ placeholder="Paste any news article content here…",
366
+ label_visibility="collapsed", key="qa_context")
367
+ st.markdown("</div>", unsafe_allow_html=True)
368
+
369
+ with r2:
370
+ st.markdown('<div class="section-label">Question</div>', unsafe_allow_html=True)
371
+ st.markdown('<div class="card">', unsafe_allow_html=True)
372
+ st.markdown('<div class="card-title">Ask anything about the article</div>', unsafe_allow_html=True)
373
+ st.markdown('<div class="card-sub">The model extracts an answer from the context on the left.</div>',
374
+ unsafe_allow_html=True)
375
+
376
+ question_text = st.text_area("", height=120,
377
+ placeholder="e.g. Who is mentioned in this article?",
378
+ label_visibility="collapsed", key="qa_question")
379
+ ask_btn = st.button("πŸ” Get Answer", use_container_width=True)
380
+ st.markdown("</div>", unsafe_allow_html=True)
381
+
382
+ if ask_btn:
383
+ if not context_text.strip():
384
+ st.warning("Please paste a news excerpt in the Context panel on the left.")
385
+ elif not question_text.strip():
386
+ st.warning("Please type a question.")
387
+ else:
388
+ with st.spinner("Loading Q&A model… (first run ~30 s)"):
389
+ qa, err = load_qa()
390
+ if err:
391
+ st.error(f"Q&A model failed to load: {err}")
392
+ else:
393
+ with st.spinner("Finding the answer…"):
394
+ try:
395
+ result = qa(question=question_text.strip(),
396
+ context=context_text.strip()[:3000])
397
+ score_pct = int(result["score"] * 100)
398
+ answer = result["answer"]
399
+ st.markdown(f"""
400
+ <div class="answer-box">
401
+ <div class="answer-label">Answer</div>
402
+ <div class="answer-text">{answer}</div>
403
+ <div class="score-bar-wrap">
404
+ <div class="score-bar-label">Confidence Β· {score_pct}%</div>
405
+ <div class="score-bar-outer">
406
+ <div class="score-bar-inner" style="width:{score_pct}%"></div>
407
+ </div>
408
+ </div>
409
+ </div>""", unsafe_allow_html=True)
410
+ except Exception as e:
411
+ st.error(f"Inference error: {e}")
412
+
413
+ if st.session_state.get("classification_done"):
414
+ st.markdown("---")
415
+ st.markdown('<div class="section-label">Suggested Questions</div>', unsafe_allow_html=True)
416
+ c1, c2, c3, c4 = st.columns(4)
417
+ for col, q in zip([c1, c2, c3, c4],
418
+ ["Who is this article about?", "What event is described?",
419
+ "Where did this take place?", "What was the outcome?"]):
420
+ col.markdown(f"""
421
+ <div class="card" style="padding:1rem 1.2rem;text-align:center;">
422
+ <div style="font-size:.85rem;color:#94a3b8;">{q}</div>
423
+ </div>""", unsafe_allow_html=True)
424
+
425
+ # ══════════════════════════════════════════════════════════════════════════════
426
+ # TAB 3 – INSIGHTS
427
+ # ══════════════════════════════════════════════════════════════════════════════
428
+ with tab3:
429
+ if not st.session_state.get("classification_done"):
430
+ st.markdown("""
431
+ <div class="card" style="text-align:center;padding:4rem 2rem;">
432
+ <div style="font-size:3.5rem;margin-bottom:1rem">πŸ“Š</div>
433
+ <div style="font-family:'Syne',sans-serif;font-size:1.1rem;font-weight:700;color:#334155;">
434
+ Insights unlock after classification</div>
435
+ <div style="font-size:.88rem;color:#475569;margin-top:.5rem;">
436
+ Go to <strong style="color:#00c8b4">Text Classification</strong>,
437
+ upload a CSV, and run the model first.</div>
438
+ </div>""", unsafe_allow_html=True)
439
+ else:
440
+ df_ins = st.session_state["df_classified"]
441
+ counts = df_ins["class"].value_counts()
442
+ total = len(df_ins)
443
+
444
+ # KPI row
445
+ kpi_cols = st.columns(5)
446
+ for col, cat in zip(kpi_cols, CATEGORIES):
447
+ cnt = int(counts.get(cat, 0))
448
+ pct = round(cnt / total * 100, 1) if total else 0
449
+ badge = CAT_BADGE.get(cat, "badge-teal")
450
+ col.markdown(f"""
451
+ <div class="card" style="text-align:center;padding:1.4rem 1rem;">
452
+ <div class="badge {badge}" style="margin-bottom:.7rem">{cat.replace('_',' ')}</div>
453
+ <div style="font-family:'Syne',sans-serif;font-size:1.9rem;font-weight:800;color:#e2e8f0">{cnt}</div>
454
+ <div style="font-size:.78rem;color:#64748b;margin-top:.2rem">{pct}% of total</div>
455
+ </div>""", unsafe_allow_html=True)
456
+
457
+ st.markdown("---")
458
+ ch1, ch2 = st.columns(2, gap="large")
459
+
460
+ with ch1:
461
+ st.markdown('<div class="section-label">Category Distribution</div>', unsafe_allow_html=True)
462
+ fig, ax = plt.subplots(figsize=(5, 4.2), facecolor="#0f172a")
463
+ labels = [c.replace("_", " ") for c in counts.index]
464
+ colors = [CAT_COLOR.get(c, "#00c8b4") for c in counts.index]
465
+ wedges, _, autotexts = ax.pie(
466
+ counts.values, labels=None, autopct="%1.1f%%", colors=colors,
467
+ startangle=120, wedgeprops=dict(width=0.55, edgecolor="#07090f", linewidth=2),
468
+ pctdistance=0.78)
469
+ for at in autotexts:
470
+ at.set_color("#e2e8f0"); at.set_fontsize(8.5); at.set_fontweight("bold")
471
+ ax.legend(wedges, labels, loc="lower center", bbox_to_anchor=(0.5, -0.12),
472
+ ncol=3, frameon=False, labelcolor="#94a3b8", fontsize=8)
473
+ ax.set_facecolor("#0f172a"); fig.patch.set_facecolor("#0f172a")
474
+ st.pyplot(fig, use_container_width=True); plt.close(fig)
475
+
476
+ with ch2:
477
+ st.markdown('<div class="section-label">Article Counts by Category</div>', unsafe_allow_html=True)
478
+ fig2, ax2 = plt.subplots(figsize=(5, 4.2), facecolor="#0f172a")
479
+ bars = ax2.barh([l.replace("_", " ") for l in counts.index], counts.values,
480
+ color=[CAT_COLOR.get(c, "#00c8b4") for c in counts.index],
481
+ height=0.55, edgecolor="none")
482
+ ax2.set_facecolor("#0f172a")
483
+ for sp in ["top", "right"]: ax2.spines[sp].set_visible(False)
484
+ for sp in ["left", "bottom"]: ax2.spines[sp].set_color("#1e2d45")
485
+ ax2.tick_params(colors="#64748b", labelsize=8.5)
486
+ for bar in bars:
487
+ ax2.text(bar.get_width() + 0.4, bar.get_y() + bar.get_height() / 2,
488
+ str(int(bar.get_width())), va="center", ha="left",
489
+ color="#e2e8f0", fontsize=8.5, fontweight="bold")
490
+ fig2.patch.set_facecolor("#0f172a")
491
+ st.pyplot(fig2, use_container_width=True); plt.close(fig2)
492
+
493
+ st.markdown("---")
494
+ st.markdown('<div class="section-label">Word Cloud by Category</div>', unsafe_allow_html=True)
495
+ selected_cat = st.selectbox("", options=CATEGORIES,
496
+ format_func=lambda c: c.replace("_", " "),
497
+ label_visibility="collapsed")
498
+
499
+ cat_texts = df_ins[df_ins["class"] == selected_cat]["content"].fillna("").tolist()
500
+ combined = " ".join(preprocess_text(t) for t in cat_texts[:200])
501
+
502
+ if combined.strip():
503
+ wc = WordCloud(width=900, height=340, background_color="#0f172a",
504
+ colormap="cool", max_words=120, collocations=False).generate(combined)
505
+ fig3, ax3 = plt.subplots(figsize=(9, 3.5), facecolor="#0f172a")
506
+ ax3.imshow(wc, interpolation="bilinear"); ax3.axis("off")
507
+ fig3.patch.set_facecolor("#0f172a")
508
+ st.pyplot(fig3, use_container_width=True); plt.close(fig3)
509
+ else:
510
+ st.info(f"No content found for: {selected_cat.replace('_',' ')}")
511
+
512
+ st.markdown("---")
513
+ st.markdown(f'<div class="section-label">Top Unigrams Β· {selected_cat.replace("_"," ")}</div>',
514
+ unsafe_allow_html=True)
515
+ top_words = Counter(combined.split()).most_common(15)
516
+ if top_words:
517
+ words, freqs = zip(*top_words)
518
+ fig4, ax4 = plt.subplots(figsize=(9, 3), facecolor="#0f172a")
519
+ ax4.bar(words, freqs, color=CAT_COLOR.get(selected_cat, "#00c8b4"), edgecolor="none", width=0.6)
520
+ ax4.set_facecolor("#0f172a")
521
+ for sp in ["top", "right"]: ax4.spines[sp].set_visible(False)
522
+ for sp in ["left", "bottom"]: ax4.spines[sp].set_color("#1e2d45")
523
+ ax4.tick_params(axis="x", colors="#64748b", labelsize=8, rotation=30)
524
+ ax4.tick_params(axis="y", colors="#64748b", labelsize=8)
525
+ fig4.patch.set_facecolor("#0f172a")
526
+ st.pyplot(fig4, use_container_width=True); plt.close(fig4)
527
+
528
+ # ─── Footer ───────────────────────────────────────────────────────────────────
529
+ st.markdown("""
530
+ <div style="text-align:center;padding:2.5rem 0 1rem;color:#2a3a55;
531
+ font-size:.78rem;border-top:1px solid #1a2a44;margin-top:3rem;">
532
+ Built for <strong style="color:#00c8b4">IN23-S5-DA3111 Β· Text Analytics Assignment 1</strong>
533
+ &nbsp;Β·&nbsp; Powered by Hugging Face &amp; Streamlit
534
+ </div>
535
+ """, unsafe_allow_html=True)