Daksh C Jain
Initial commit: EIS Topic Intelligence — UMAP+HDBSCAN+Mistral council, dark EIS theme, 23 clusters from Enterprise Information Systems corpus
c91d9b4
import html
import json
import os
from typing import List
# Load .env file automatically if present
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass # python-dotenv not installed; set env vars manually or pip install python-dotenv
import gradio as gr
import pandas as pd
from topic_pipeline import (
OUTPUT_DIR,
parse_notebooklm_tccm_text,
run_complete_pipeline,
write_tccm_dual_validation,
)
os.makedirs(OUTPUT_DIR, exist_ok=True)
def _exists(name: str) -> bool:
return os.path.exists(os.path.join(OUTPUT_DIR, name))
def _load_json(name: str):
with open(os.path.join(OUTPUT_DIR, name), "r", encoding="utf-8") as f:
return json.load(f)
def _download_files() -> List[str]:
names = [
"comparison.csv",
"taxonomy_map.json",
"topic_model_report.md",
"narrative.txt",
"cluster_optimization_log.csv",
"llm_council_validation.csv",
"tccm_validation.csv",
"tccm_dual_validation.csv",
"notebooklm_extraction.csv",
"compliance_checklist.csv",
"compliance_checklist.json",
"run_metadata.json",
"combined_labels.json",
]
return [os.path.join(OUTPUT_DIR, name) for name in names if _exists(name)]
def _phase_html() -> str:
phases = [
("Corpus", _exists("corpus_config.json")),
("Embeddings", _exists("combined_emb.npy")),
("Optimization", _exists("cluster_optimization_log.csv")),
("Clusters", _exists("combined_labels.json")),
("Council", _exists("llm_council_validation.csv")),
("TCCM", _exists("tccm_validation.csv")),
("Compliance", _exists("compliance_checklist.csv")),
("Report", _exists("topic_model_report.md")),
]
chips = []
for name, done in phases:
cls = "done" if done else "pending"
mark = "✓" if done else "·"
chips.append(
f"<span class='eis-phase-chip {cls}'>{mark} {name}</span>"
)
return "<div style='display:flex;gap:8px;flex-wrap:wrap;padding:4px 0'>" + "".join(chips) + "</div>"
def _cluster_table():
if not _exists("combined_labels.json"):
return []
rows = []
for s in _load_json("combined_labels.json"):
rows.append([
s.get("cluster_id"),
s.get("label"),
s.get("category"),
s.get("paper_count"),
s.get("confidence"),
s.get("agreement_score"),
"; ".join(s.get("keywords", [])[:8]),
" | ".join(s.get("top_titles", [])[:3]),
s.get("reasoning", ""),
])
return rows
def _council_table():
path = os.path.join(OUTPUT_DIR, "llm_council_validation.csv")
if not os.path.exists(path):
return []
return pd.read_csv(path).head(120)
def _council_viz_html() -> str:
path = os.path.join(OUTPUT_DIR, "llm_council_validation.csv")
if not os.path.exists(path):
return (
"<div class='council-empty'>Run the pipeline to activate the LLM Council "
"validation board.</div>"
)
df = pd.read_csv(path)
if df.empty:
return "<div class='council-empty'>Council validation file is empty.</div>"
grouped = list(df.groupby(["cluster_id", "final_label"], sort=False))[:6]
rows = []
avg_agreement = float(df["agreement_score"].mean()) if "agreement_score" in df else 0
avg_confidence = float(df["confidence"].mean()) if "confidence" in df else 0
llm_member_present = df["member"].astype(str).str.contains("LLM|Mistral", case=False, regex=True).any()
llm_status = "Mistral LLM active" if llm_member_present else "Local semantic fallback active"
for (cluster_id, final_label), group in grouped:
votes = []
for _, row in group.iterrows():
member = html.escape(str(row.get("member", "")))
label = html.escape(str(row.get("member_label", "")))
method = html.escape(str(row.get("method", "")))
votes.append(
"<div class='council-vote'>"
"<div class='vote-dot'></div>"
f"<div><strong>{member}</strong><span>{label}</span><small>{method}</small></div>"
"</div>"
)
confidence = int(float(group["confidence"].iloc[0]) * 100)
agreement = int(float(group["agreement_score"].iloc[0]) * 100)
rows.append(
"<div class='council-cluster'>"
"<div class='cluster-head'>"
f"<span>Cluster {html.escape(str(cluster_id))}</span>"
f"<strong>{html.escape(str(final_label))}</strong>"
"</div>"
"<div class='council-flow'>"
+ "".join(votes) +
"<div class='final-label'>"
"<small>Accepted label</small>"
f"<strong>{html.escape(str(final_label))}</strong>"
f"<span>{confidence}% confidence | {agreement}% agreement</span>"
"</div>"
"</div>"
"</div>"
)
return (
"<div class='council-board'>"
"<div class='council-top'>"
"<div><h3>LLM Council Validation Running In-App</h3>"
"<p>Three independent validators inspect each cluster label, compare votes, "
"and write the accepted label plus agreement score into the export file.</p></div>"
"<div class='council-metrics'>"
f"<div><strong>{len(df['cluster_id'].unique())}</strong><span>clusters checked</span></div>"
f"<div><strong>{int(avg_agreement * 100)}%</strong><span>avg agreement</span></div>"
f"<div><strong>{int(avg_confidence * 100)}%</strong><span>avg confidence</span></div>"
f"<div><strong>{html.escape(llm_status)}</strong><span>council mode</span></div>"
"</div></div>"
"<div class='council-lane'>"
"<div class='pulse-node'>1<br><span>Keyword Extractor</span></div>"
"<div class='pulse-line'></div>"
"<div class='pulse-node'>2<br><span>PAJAIS Mapper</span></div>"
"<div class='pulse-line'></div>"
"<div class='pulse-node'>3<br><span>LLM / Semantic Judge</span></div>"
"<div class='pulse-line'></div>"
"<div class='pulse-node final'>OK<br><span>Validated Label</span></div>"
"</div>"
+ "".join(rows) +
"</div>"
)
def _optimizer_table():
path = os.path.join(OUTPUT_DIR, "cluster_optimization_log.csv")
if not os.path.exists(path):
return []
df = pd.read_csv(path)
cols = [
c for c in [
"algorithm",
"umap_n_neighbors",
"umap_n_components",
"hdbscan_min_cluster_size",
"hdbscan_min_samples",
"n_clusters",
"noise_ratio",
"min_size",
"max_size",
"too_small",
"too_large",
"silhouette_cosine",
"score",
"optimizer_recommendation",
] if c in df.columns
]
return df[cols].head(80)
def _tccm_table():
path = os.path.join(OUTPUT_DIR, "tccm_validation.csv")
if not os.path.exists(path):
return []
return pd.read_csv(path).head(100)
def _tccm_dual_table():
path = os.path.join(OUTPUT_DIR, "tccm_dual_validation.csv")
if not os.path.exists(path):
return []
return pd.read_csv(path).head(100)
def _compliance_table():
path = os.path.join(OUTPUT_DIR, "compliance_checklist.csv")
if not os.path.exists(path):
return []
return pd.read_csv(path)
def _compliance_html() -> str:
path = os.path.join(OUTPUT_DIR, "compliance_checklist.csv")
if not os.path.exists(path):
return (
"<div class='compliance-empty'>Run the pipeline to generate the professor-requirement "
"compliance checklist.</div>"
)
df = pd.read_csv(path)
color_map = {
"PASS": "#0f766e",
"FAIL": "#b91c1c",
"CONFIG_REQUIRED": "#b45309",
"ENV_FALLBACK": "#b45309",
"INPUT_REQUIRED": "#b45309",
"PARTIAL": "#7c3aed",
"MANUAL_REQUIRED": "#475569",
"REVIEW": "#7c3aed",
}
rows = []
for _, row in df.iterrows():
status = str(row.get("Status", "REVIEW"))
color = color_map.get(status, "#475569")
rows.append(
"<div class='compliance-row'>"
f"<span style='background:{color}'>{html.escape(status)}</span>"
f"<strong>{html.escape(str(row.get('Requirement', '')))}</strong>"
f"<p>{html.escape(str(row.get('Evidence', '')))}</p>"
f"<small>{html.escape(str(row.get('File', '')))}</small>"
"</div>"
)
return (
"<div class='compliance-board'>"
"<h3>Professor Requirement Compliance Checklist</h3>"
"<p>This separates completed app evidence from items that still need API secrets, "
"NotebookLM/full-text inputs, or mentor approval.</p>"
"<div class='compliance-grid'>" + "".join(rows) + "</div></div>"
)
def _tccm_dual_status_html() -> str:
path = os.path.join(OUTPUT_DIR, "tccm_dual_validation.csv")
if not os.path.exists(path):
return (
"<div class='compliance-empty'>Upload NotebookLM and second-LLM extraction CSVs "
"to generate TCCM dual validation.</div>"
)
df = pd.read_csv(path)
status_col = "Final_TCCM_Compliance_Status"
if status_col not in df.columns:
return "<div class='compliance-empty'>TCCM dual validation is pending source uploads.</div>"
counts = df[status_col].value_counts().to_dict()
cards = []
for status, count in counts.items():
ok = "COMPLIANT" in str(status)
color = "#0f766e" if ok else "#b45309"
cards.append(
f"<div class='tccm-card'><strong style='color:{color}'>{count}</strong>"
f"<span>{html.escape(str(status))}</span></div>"
)
return (
"<div class='tccm-status'><h3>TCCM Dual Validation Status</h3>"
"<p>Required by email: NotebookLM extraction plus another LLM/extraction method. "
"This screen reconciles those files with regex/semantic extraction.</p>"
"<div>" + "".join(cards) + "</div></div>"
)
def _on_tccm_dual_validate(notebook_file, second_file):
notebook_path = notebook_file if isinstance(notebook_file, str) else getattr(notebook_file, "name", "")
second_path = second_file if isinstance(second_file, str) else getattr(second_file, "name", "")
write_tccm_dual_validation(notebook_path, second_path)
return _tccm_dual_status_html(), _tccm_dual_table(), _download_files()
def _on_notebooklm_paste(notebook_text):
if not str(notebook_text or "").strip():
return (
"<div class='compliance-empty'>Paste the NotebookLM table text first.</div>",
_tccm_dual_table(),
_download_files(),
)
notebook_path = parse_notebooklm_tccm_text(notebook_text)
write_tccm_dual_validation(notebook_path, "")
count = len(pd.read_csv(notebook_path)) if os.path.exists(notebook_path) else 0
status = (
f"<div class='tccm-status'><h3>NotebookLM Paste Imported</h3>"
f"<p>Parsed {count} NotebookLM rows into <code>outputs/notebooklm_extraction.csv</code>. "
"Merged with the independent regex/semantic extractor in "
"<code>outputs/tccm_dual_validation.csv</code>. Upload a second-LLM CSV as well "
"for full NotebookLM + second LLM compliance.</p></div>"
+ _tccm_dual_status_html()
)
return status, _tccm_dual_table(), _download_files()
def _chart_iframe(name: str) -> str:
path = os.path.join(OUTPUT_DIR, "combined_charts", name)
if not os.path.exists(path):
return (
"<div style='height:320px;display:grid;place-items:center;"
"background:#0f172a;color:#94a3b8;border-radius:8px'>"
"Run the pipeline to generate this chart.</div>"
)
with open(path, "r", encoding="utf-8") as f:
srcdoc = f.read().replace("&", "&amp;").replace('"', "&quot;")
return (
f"<iframe srcdoc=\"{srcdoc}\" width='100%' height='500' "
"style='border:0;border-radius:8px;background:#0f172a'></iframe>"
)
def _cards_html() -> str:
if not _exists("combined_labels.json"):
return (
"<div style='padding:32px;color:#475569;background:#0a1220;"
"border:1px dashed #1e3a5c;border-radius:12px;font-family:Inter,sans-serif'>"
"Clusters will appear here after a complete run.</div>"
)
cards = []
for s in _load_json("combined_labels.json"):
evidence = html.escape(" | ".join(s.get("top_titles", [])[:3]))
label = html.escape(s.get("label", "Cluster"))
category = html.escape(s.get("category", "Unmapped"))
keywords = html.escape(", ".join(s.get("keywords", [])[:8]))
conf = int(float(s.get("confidence", 0)) * 100)
cards.append(
"<div class='eis-cluster-card'>"
f"<div style='font-size:15px;font-weight:800;color:#f1f5f9;font-family:Outfit,sans-serif'>{label}</div>"
f"<div style='font-size:11px;color:#60a5fa;margin-top:4px;font-weight:600;letter-spacing:0.3px'>{category}</div>"
f"<div style='margin-top:10px;font-size:12px;color:#475569'>"
f"{s.get('paper_count', 0)} papers &nbsp;·&nbsp; confidence <span style='color:#fbbf24;font-weight:700'>{conf}%</span></div>"
f"<div style='margin-top:8px;font-size:12px;color:#1d4ed8;font-weight:600'>{keywords}</div>"
f"<div style='margin-top:10px;font-size:11px;color:#334155;line-height:1.5'>{evidence}</div>"
"</div>"
)
return (
"<div style='display:grid;grid-template-columns:repeat(auto-fit,minmax(280px,1fr));"
"gap:14px;padding:4px 0'>" + "".join(cards) + "</div>"
)
def _summary_markdown(result=None) -> str:
if result is None and not _exists("run_metadata.json"):
return (
"Upload the Scopus CSV and click **Run Complete Pipeline**. "
"The app will generate paper-level Title+Abstract+DOI embeddings, optimize "
"UMAP/HDBSCAN clustering, label 15-25 clusters through an in-app council, "
"map them to PAJAIS, and export TCCM validation files."
)
meta = result or {}
if not meta:
meta = {
"parameters": _load_json("run_metadata.json").get("selected_parameters", {}),
"embedding": _load_json("run_metadata.json").get("embedding", {}),
"clusters": _load_json("combined_labels.json") if _exists("combined_labels.json") else [],
"taxonomy": _load_json("taxonomy_map.json") if _exists("taxonomy_map.json") else {},
"config": _load_json("corpus_config.json") if _exists("corpus_config.json") else {},
}
params = meta.get("parameters", {})
emb = meta.get("embedding", {})
tax = meta.get("taxonomy", {}).get("coverage_stats", {})
cfg = meta.get("config", {})
return (
f"**Run complete.** Analysed {cfg.get('rows', 'N/A')} papers from "
f"{cfg.get('journal', 'the corpus')} ({cfg.get('year_min')} to {cfg.get('year_max')}).\n\n"
f"Selected clustering: `{params.get('algorithm')}` with "
f"`{params.get('n_clusters')}` clusters, min size `{params.get('min_size')}`, "
f"max size `{params.get('max_size')}`, noise ratio `{params.get('noise_ratio')}`.\n\n"
f"Embedding: `{emb.get('embedding_model')}`. PAJAIS mapped: "
f"`{tax.get('mapped', 0)}`; novel: `{tax.get('novel', 0)}`. "
"Download the optimizer log and council validation for the final submission appendix."
)
def _run(file_obj):
if file_obj is None:
return (
"Upload a CSV first.",
_phase_html(),
_cluster_table(),
_cards_html(),
_optimizer_table(),
_compliance_html(),
_compliance_table(),
_council_viz_html(),
_council_table(),
_tccm_table(),
_chart_iframe("intertopic_map.html"),
_chart_iframe("bar_chart.html"),
_chart_iframe("treemap.html"),
_download_files(),
)
filepath = file_obj if isinstance(file_obj, str) else file_obj.name
result = run_complete_pipeline(filepath)
return (
_summary_markdown(result),
_phase_html(),
_cluster_table(),
_cards_html(),
_optimizer_table(),
_compliance_html(),
_compliance_table(),
_council_viz_html(),
_council_table(),
_tccm_table(),
_chart_iframe("intertopic_map.html"),
_chart_iframe("bar_chart.html"),
_chart_iframe("treemap.html"),
result["deliverables"],
)
def _refresh():
return (
_summary_markdown(),
_phase_html(),
_cluster_table(),
_cards_html(),
_optimizer_table(),
_compliance_html(),
_compliance_table(),
_council_viz_html(),
_council_table(),
_tccm_table(),
_chart_iframe("intertopic_map.html"),
_chart_iframe("bar_chart.html"),
_chart_iframe("treemap.html"),
_download_files(),
)
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=Outfit:wght@700;800;900&display=swap');
* { box-sizing: border-box; }
body, .gradio-container {
background: #070b14 !important;
font-family: 'Inter', sans-serif !important;
}
.gradio-container { max-width: 1400px !important; }
/* ── hero header ── */
.eis-hero {
background: linear-gradient(135deg, #0d1b2e 0%, #0a2240 40%, #0c1a35 100%);
border: 1px solid #1a3a5c;
border-radius: 16px;
padding: 36px 40px;
margin-bottom: 8px;
position: relative;
overflow: hidden;
}
.eis-hero::before {
content: '';
position: absolute;
top: -60px; right: -60px;
width: 280px; height: 280px;
background: radial-gradient(circle, rgba(245,158,11,0.12) 0%, transparent 70%);
pointer-events: none;
}
.eis-hero::after {
content: '';
position: absolute;
bottom: -40px; left: 30%;
width: 200px; height: 200px;
background: radial-gradient(circle, rgba(59,130,246,0.1) 0%, transparent 70%);
pointer-events: none;
}
.eis-hero-badge {
display: inline-block;
background: rgba(245,158,11,0.15);
border: 1px solid rgba(245,158,11,0.4);
color: #fbbf24;
font-size: 11px;
font-weight: 700;
letter-spacing: 2px;
text-transform: uppercase;
padding: 4px 12px;
border-radius: 999px;
margin-bottom: 14px;
}
.eis-hero h1 {
font-family: 'Outfit', sans-serif;
font-size: 38px;
font-weight: 900;
margin: 0 0 10px;
background: linear-gradient(90deg, #f8fafc 0%, #93c5fd 60%, #fbbf24 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
line-height: 1.15;
}
.eis-hero p {
color: #94a3b8;
font-size: 15px;
margin: 0;
line-height: 1.6;
max-width: 700px;
}
.eis-hero-stats {
display: flex;
gap: 28px;
margin-top: 22px;
flex-wrap: wrap;
}
.eis-stat {
display: flex;
flex-direction: column;
gap: 2px;
}
.eis-stat strong {
font-family: 'Outfit', sans-serif;
font-size: 22px;
font-weight: 800;
color: #fbbf24;
}
.eis-stat span {
font-size: 11px;
color: #64748b;
text-transform: uppercase;
letter-spacing: 1px;
}
/* ── phase chips ── */
.eis-phase-chip {
display: inline-flex;
gap: 6px;
align-items: center;
padding: 6px 14px;
border-radius: 999px;
font-size: 12px;
font-weight: 700;
letter-spacing: 0.3px;
transition: all 0.2s;
}
.eis-phase-chip.done {
background: linear-gradient(135deg, #1d4ed8, #0891b2);
color: #e0f2fe;
box-shadow: 0 0 12px rgba(59,130,246,0.35);
}
.eis-phase-chip.pending {
background: #0f172a;
color: #475569;
border: 1px solid #1e293b;
}
/* ── upload + run area ── */
.eis-upload-area {
background: #0d1424;
border: 1px solid #1e3a5c;
border-radius: 12px;
padding: 20px;
}
/* ── compliance ── */
.compliance-empty {
padding: 28px;
border: 1px dashed #1e3a5c;
border-radius: 10px;
background: #0a1220;
color: #475569;
font-family: 'Inter', sans-serif;
}
.compliance-board, .tccm-status {
background: #0d1424;
border: 1px solid #1e3a5c;
border-radius: 12px;
padding: 20px;
}
.compliance-board h3, .tccm-status h3 {
margin: 0;
color: #f1f5f9;
font-family: 'Outfit', sans-serif;
font-size: 20px;
font-weight: 800;
}
.compliance-board p, .tccm-status p {
color: #64748b;
margin: 6px 0 16px;
line-height: 1.5;
font-size: 14px;
}
.compliance-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 10px;
}
.compliance-row {
border: 1px solid #1e293b;
border-radius: 10px;
padding: 14px;
background: #0a1220;
transition: border-color 0.2s;
}
.compliance-row:hover { border-color: #2563eb; }
.compliance-row span {
display: inline-block;
color: white;
font-size: 10px;
font-weight: 800;
padding: 3px 10px;
border-radius: 999px;
margin-bottom: 8px;
letter-spacing: 0.5px;
}
.compliance-row strong { display: block; color: #e2e8f0; font-size: 13px; }
.compliance-row p { font-size: 12px; margin: 6px 0; color: #64748b; }
.compliance-row small { color: #334155; font-size: 11px; }
/* ── tccm cards ── */
.tccm-status > div {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
gap: 10px;
}
.tccm-card {
border: 1px solid #1e293b;
border-radius: 10px;
padding: 14px;
background: #0a1220;
}
.tccm-card strong { display: block; font-size: 28px; color: #fbbf24; font-family: 'Outfit', sans-serif; }
.tccm-card span { color: #64748b; font-size: 12px; font-weight: 600; }
/* ── council board ── */
.council-empty {
padding: 32px;
border: 1px dashed #1e3a5c;
color: #475569;
border-radius: 12px;
background: #0a1220;
}
.council-board {
background: linear-gradient(160deg, #06101e 0%, #08162a 100%);
color: #e5edf7;
border-radius: 14px;
padding: 22px;
border: 1px solid #1a3354;
}
.council-top {
display: grid;
grid-template-columns: minmax(280px, 1.2fr) minmax(320px, 1fr);
gap: 16px;
align-items: start;
}
.council-top h3 {
margin: 0;
font-size: 20px;
font-family: 'Outfit', sans-serif;
font-weight: 800;
color: #f1f5f9;
}
.council-top p { margin: 6px 0 0; color: #64748b; line-height: 1.5; font-size: 14px; }
.council-metrics { display: grid; grid-template-columns: repeat(2, minmax(140px, 1fr)); gap: 8px; }
.council-metrics div {
background: #0c1e35;
border: 1px solid #1a3a5c;
border-radius: 10px;
padding: 12px;
}
.council-metrics strong { display: block; color: #fbbf24; font-size: 20px; font-family: 'Outfit', sans-serif; }
.council-metrics span { color: #64748b; font-size: 12px; }
.council-lane {
display: grid;
grid-template-columns: 1fr 60px 1fr 60px 1fr 60px 1fr;
gap: 8px;
align-items: center;
margin: 22px 0;
}
.pulse-node {
min-height: 64px;
display: grid;
place-items: center;
text-align: center;
border: 1px solid #1d4ed8;
background: linear-gradient(135deg, #0f2040, #0d1a35);
border-radius: 10px;
color: #93c5fd;
font-weight: 800;
animation: councilGlow 1.8s ease-in-out infinite;
}
.pulse-node span { display: block; font-size: 11px; font-weight: 600; color: #bfdbfe; margin-top: 3px; }
.pulse-node.final { border-color: #d97706; background: linear-gradient(135deg, #1c1000, #2a1800); color: #fbbf24; }
.pulse-line {
height: 3px;
border-radius: 999px;
background: linear-gradient(90deg, #1d4ed8, #f59e0b, #1d4ed8);
background-size: 220% 100%;
animation: councilFlow 1.1s linear infinite;
}
.council-cluster {
margin-top: 12px;
padding: 14px;
border: 1px solid #1a3354;
border-radius: 10px;
background: #080f1c;
}
.cluster-head { display: flex; justify-content: space-between; gap: 12px; color: #cbd5e1; margin-bottom: 10px; }
.cluster-head span { color: #60a5fa; font-weight: 800; }
.cluster-head strong { color: #f8fafc; }
.council-flow { display: grid; grid-template-columns: repeat(4, minmax(160px, 1fr)); gap: 8px; }
.council-vote, .final-label {
border-radius: 10px;
padding: 12px;
background: #0c1e35;
border: 1px solid #1a3a5c;
min-height: 82px;
}
.council-vote { display: flex; gap: 8px; align-items: flex-start; }
.vote-dot {
width: 10px; height: 10px;
margin-top: 4px;
border-radius: 50%;
background: #f59e0b;
box-shadow: 0 0 14px rgba(245,158,11,0.7);
animation: councilBlink 1.2s ease-in-out infinite;
flex: 0 0 auto;
}
.council-vote strong, .final-label strong { display: block; color: #e2e8f0; font-size: 13px; }
.council-vote span, .final-label span { display: block; color: #fbbf24; font-size: 12px; margin-top: 3px; }
.council-vote small, .final-label small { display: block; color: #475569; font-size: 11px; margin-top: 4px; line-height: 1.25; }
.final-label { border-color: #d97706; background: #130d00; }
/* ── cluster cards ── */
.eis-cluster-card {
background: linear-gradient(135deg, #0d1830 0%, #0a1220 100%);
border: 1px solid #1e3a5c;
border-left: 4px solid #f59e0b;
border-radius: 12px;
padding: 18px;
min-height: 180px;
transition: transform 0.2s, box-shadow 0.2s, border-color 0.2s;
}
.eis-cluster-card:hover {
transform: translateY(-2px);
box-shadow: 0 8px 32px rgba(245,158,11,0.12);
border-color: #2563eb;
}
/* ── Gradio overrides ── */
.gradio-container .tabs { background: transparent !important; }
.gradio-container .tab-nav button {
color: #64748b !important;
font-weight: 600 !important;
border-bottom: 2px solid transparent !important;
transition: all 0.2s !important;
}
.gradio-container .tab-nav button.selected {
color: #fbbf24 !important;
border-bottom-color: #f59e0b !important;
background: transparent !important;
}
.gradio-container label { color: #94a3b8 !important; font-size: 13px !important; }
.gradio-container .prose { color: #94a3b8 !important; }
/* ── animations ── */
@keyframes councilFlow { from { background-position: 0% 0; } to { background-position: 220% 0; } }
@keyframes councilGlow { 0%, 100% { box-shadow: 0 0 0 rgba(29,78,216,0.2); } 50% { box-shadow: 0 0 22px rgba(245,158,11,0.3); } }
@keyframes councilBlink { 0%, 100% { opacity: .35; transform: scale(.75); } 50% { opacity: 1; transform: scale(1.15); } }
@keyframes fadeIn { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: translateY(0); } }
@media (max-width: 900px) {
.council-top, .council-flow { grid-template-columns: 1fr; }
.council-lane { grid-template-columns: 1fr; }
.pulse-line { height: 18px; width: 3px; justify-self: center; }
.eis-hero h1 { font-size: 26px; }
.eis-hero-stats { gap: 16px; }
}
"""
with gr.Blocks(title="EIS Topic Intelligence", css=CSS, theme=gr.themes.Base()) as demo:
gr.HTML(
"<div class='eis-hero'>"
"<div class='eis-hero-badge'>EIS &nbsp;·&nbsp; SPJIMR Research Analytics</div>"
"<h1>EIS Topic Intelligence</h1>"
"<p>Paper-level SPECTER2 / TF-IDF embeddings &nbsp;·&nbsp; UMAP + HDBSCAN optimised clustering"
" &nbsp;·&nbsp; Live Mistral LLM council validation &nbsp;·&nbsp; PAJAIS taxonomy mapping &nbsp;·&nbsp; TCCM extraction</p>"
"<div class='eis-hero-stats'>"
"<div class='eis-stat'><strong>15–25</strong><span>Target clusters</span></div>"
"<div class='eis-stat'><strong>3</strong><span>Council validators</span></div>"
"<div class='eis-stat'><strong>25</strong><span>PAJAIS categories</span></div>"
"<div class='eis-stat'><strong>100</strong><span>TCCM papers</span></div>"
"</div>"
"</div>"
)
phase = gr.HTML(value=_phase_html())
with gr.Row():
csv_file = gr.File(label="📂 Upload Scopus Journal CSV", file_types=[".csv"], scale=3)
with gr.Column(scale=1):
run_btn = gr.Button("▶ Run Complete Pipeline", variant="primary")
refresh_btn = gr.Button("↻ Refresh Outputs")
summary = gr.Markdown(value=_summary_markdown())
with gr.Tabs():
with gr.Tab("Clusters"):
cluster_table = gr.Dataframe(
headers=[
"Cluster ID", "Label", "PAJAIS Category", "Papers", "Confidence",
"Agreement", "Keywords", "Top 3 Titles", "Reasoning",
],
value=_cluster_table(),
wrap=True,
interactive=False,
)
cluster_cards = gr.HTML(value=_cards_html())
with gr.Tab("Optimization"):
optimizer_table = gr.Dataframe(value=_optimizer_table(), wrap=True, interactive=False)
with gr.Tab("Compliance"):
compliance_panel = gr.HTML(value=_compliance_html())
compliance_table = gr.Dataframe(value=_compliance_table(), wrap=True, interactive=False)
with gr.Tab("Council Validation"):
council_viz = gr.HTML(value=_council_viz_html())
council_table = gr.Dataframe(value=_council_table(), wrap=True, interactive=False)
with gr.Tab("TCCM Validation"):
tccm_table = gr.Dataframe(value=_tccm_table(), wrap=True, interactive=False)
with gr.Tab("Charts"):
chart_map = gr.HTML(value=_chart_iframe("intertopic_map.html"))
chart_bar = gr.HTML(value=_chart_iframe("bar_chart.html"))
chart_tree = gr.HTML(value=_chart_iframe("treemap.html"))
with gr.Tab("Downloads"):
downloads = gr.File(value=_download_files(), label="Generated deliverables", file_count="multiple")
outputs = [
summary,
phase,
cluster_table,
cluster_cards,
optimizer_table,
compliance_panel,
compliance_table,
council_viz,
council_table,
tccm_table,
chart_map,
chart_bar,
chart_tree,
downloads,
]
run_btn.click(fn=_run, inputs=[csv_file], outputs=outputs, show_api=False, api_name=False)
refresh_btn.click(fn=_refresh, inputs=None, outputs=outputs, show_api=False, api_name=False)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)