Spaces:
Sleeping
Sleeping
Daksh C Jain
Initial commit: EIS Topic Intelligence — UMAP+HDBSCAN+Mistral council, dark EIS theme, 23 clusters from Enterprise Information Systems corpus
c91d9b4 | import html | |
| import json | |
| import os | |
| from typing import List | |
| # Load .env file automatically if present | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| except ImportError: | |
| pass # python-dotenv not installed; set env vars manually or pip install python-dotenv | |
| import gradio as gr | |
| import pandas as pd | |
| from topic_pipeline import ( | |
| OUTPUT_DIR, | |
| parse_notebooklm_tccm_text, | |
| run_complete_pipeline, | |
| write_tccm_dual_validation, | |
| ) | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| def _exists(name: str) -> bool: | |
| return os.path.exists(os.path.join(OUTPUT_DIR, name)) | |
| def _load_json(name: str): | |
| with open(os.path.join(OUTPUT_DIR, name), "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| def _download_files() -> List[str]: | |
| names = [ | |
| "comparison.csv", | |
| "taxonomy_map.json", | |
| "topic_model_report.md", | |
| "narrative.txt", | |
| "cluster_optimization_log.csv", | |
| "llm_council_validation.csv", | |
| "tccm_validation.csv", | |
| "tccm_dual_validation.csv", | |
| "notebooklm_extraction.csv", | |
| "compliance_checklist.csv", | |
| "compliance_checklist.json", | |
| "run_metadata.json", | |
| "combined_labels.json", | |
| ] | |
| return [os.path.join(OUTPUT_DIR, name) for name in names if _exists(name)] | |
| def _phase_html() -> str: | |
| phases = [ | |
| ("Corpus", _exists("corpus_config.json")), | |
| ("Embeddings", _exists("combined_emb.npy")), | |
| ("Optimization", _exists("cluster_optimization_log.csv")), | |
| ("Clusters", _exists("combined_labels.json")), | |
| ("Council", _exists("llm_council_validation.csv")), | |
| ("TCCM", _exists("tccm_validation.csv")), | |
| ("Compliance", _exists("compliance_checklist.csv")), | |
| ("Report", _exists("topic_model_report.md")), | |
| ] | |
| chips = [] | |
| for name, done in phases: | |
| cls = "done" if done else "pending" | |
| mark = "✓" if done else "·" | |
| chips.append( | |
| f"<span class='eis-phase-chip {cls}'>{mark} {name}</span>" | |
| ) | |
| return "<div style='display:flex;gap:8px;flex-wrap:wrap;padding:4px 0'>" + "".join(chips) + "</div>" | |
| def _cluster_table(): | |
| if not _exists("combined_labels.json"): | |
| return [] | |
| rows = [] | |
| for s in _load_json("combined_labels.json"): | |
| rows.append([ | |
| s.get("cluster_id"), | |
| s.get("label"), | |
| s.get("category"), | |
| s.get("paper_count"), | |
| s.get("confidence"), | |
| s.get("agreement_score"), | |
| "; ".join(s.get("keywords", [])[:8]), | |
| " | ".join(s.get("top_titles", [])[:3]), | |
| s.get("reasoning", ""), | |
| ]) | |
| return rows | |
| def _council_table(): | |
| path = os.path.join(OUTPUT_DIR, "llm_council_validation.csv") | |
| if not os.path.exists(path): | |
| return [] | |
| return pd.read_csv(path).head(120) | |
| def _council_viz_html() -> str: | |
| path = os.path.join(OUTPUT_DIR, "llm_council_validation.csv") | |
| if not os.path.exists(path): | |
| return ( | |
| "<div class='council-empty'>Run the pipeline to activate the LLM Council " | |
| "validation board.</div>" | |
| ) | |
| df = pd.read_csv(path) | |
| if df.empty: | |
| return "<div class='council-empty'>Council validation file is empty.</div>" | |
| grouped = list(df.groupby(["cluster_id", "final_label"], sort=False))[:6] | |
| rows = [] | |
| avg_agreement = float(df["agreement_score"].mean()) if "agreement_score" in df else 0 | |
| avg_confidence = float(df["confidence"].mean()) if "confidence" in df else 0 | |
| llm_member_present = df["member"].astype(str).str.contains("LLM|Mistral", case=False, regex=True).any() | |
| llm_status = "Mistral LLM active" if llm_member_present else "Local semantic fallback active" | |
| for (cluster_id, final_label), group in grouped: | |
| votes = [] | |
| for _, row in group.iterrows(): | |
| member = html.escape(str(row.get("member", ""))) | |
| label = html.escape(str(row.get("member_label", ""))) | |
| method = html.escape(str(row.get("method", ""))) | |
| votes.append( | |
| "<div class='council-vote'>" | |
| "<div class='vote-dot'></div>" | |
| f"<div><strong>{member}</strong><span>{label}</span><small>{method}</small></div>" | |
| "</div>" | |
| ) | |
| confidence = int(float(group["confidence"].iloc[0]) * 100) | |
| agreement = int(float(group["agreement_score"].iloc[0]) * 100) | |
| rows.append( | |
| "<div class='council-cluster'>" | |
| "<div class='cluster-head'>" | |
| f"<span>Cluster {html.escape(str(cluster_id))}</span>" | |
| f"<strong>{html.escape(str(final_label))}</strong>" | |
| "</div>" | |
| "<div class='council-flow'>" | |
| + "".join(votes) + | |
| "<div class='final-label'>" | |
| "<small>Accepted label</small>" | |
| f"<strong>{html.escape(str(final_label))}</strong>" | |
| f"<span>{confidence}% confidence | {agreement}% agreement</span>" | |
| "</div>" | |
| "</div>" | |
| "</div>" | |
| ) | |
| return ( | |
| "<div class='council-board'>" | |
| "<div class='council-top'>" | |
| "<div><h3>LLM Council Validation Running In-App</h3>" | |
| "<p>Three independent validators inspect each cluster label, compare votes, " | |
| "and write the accepted label plus agreement score into the export file.</p></div>" | |
| "<div class='council-metrics'>" | |
| f"<div><strong>{len(df['cluster_id'].unique())}</strong><span>clusters checked</span></div>" | |
| f"<div><strong>{int(avg_agreement * 100)}%</strong><span>avg agreement</span></div>" | |
| f"<div><strong>{int(avg_confidence * 100)}%</strong><span>avg confidence</span></div>" | |
| f"<div><strong>{html.escape(llm_status)}</strong><span>council mode</span></div>" | |
| "</div></div>" | |
| "<div class='council-lane'>" | |
| "<div class='pulse-node'>1<br><span>Keyword Extractor</span></div>" | |
| "<div class='pulse-line'></div>" | |
| "<div class='pulse-node'>2<br><span>PAJAIS Mapper</span></div>" | |
| "<div class='pulse-line'></div>" | |
| "<div class='pulse-node'>3<br><span>LLM / Semantic Judge</span></div>" | |
| "<div class='pulse-line'></div>" | |
| "<div class='pulse-node final'>OK<br><span>Validated Label</span></div>" | |
| "</div>" | |
| + "".join(rows) + | |
| "</div>" | |
| ) | |
| def _optimizer_table(): | |
| path = os.path.join(OUTPUT_DIR, "cluster_optimization_log.csv") | |
| if not os.path.exists(path): | |
| return [] | |
| df = pd.read_csv(path) | |
| cols = [ | |
| c for c in [ | |
| "algorithm", | |
| "umap_n_neighbors", | |
| "umap_n_components", | |
| "hdbscan_min_cluster_size", | |
| "hdbscan_min_samples", | |
| "n_clusters", | |
| "noise_ratio", | |
| "min_size", | |
| "max_size", | |
| "too_small", | |
| "too_large", | |
| "silhouette_cosine", | |
| "score", | |
| "optimizer_recommendation", | |
| ] if c in df.columns | |
| ] | |
| return df[cols].head(80) | |
| def _tccm_table(): | |
| path = os.path.join(OUTPUT_DIR, "tccm_validation.csv") | |
| if not os.path.exists(path): | |
| return [] | |
| return pd.read_csv(path).head(100) | |
| def _tccm_dual_table(): | |
| path = os.path.join(OUTPUT_DIR, "tccm_dual_validation.csv") | |
| if not os.path.exists(path): | |
| return [] | |
| return pd.read_csv(path).head(100) | |
| def _compliance_table(): | |
| path = os.path.join(OUTPUT_DIR, "compliance_checklist.csv") | |
| if not os.path.exists(path): | |
| return [] | |
| return pd.read_csv(path) | |
| def _compliance_html() -> str: | |
| path = os.path.join(OUTPUT_DIR, "compliance_checklist.csv") | |
| if not os.path.exists(path): | |
| return ( | |
| "<div class='compliance-empty'>Run the pipeline to generate the professor-requirement " | |
| "compliance checklist.</div>" | |
| ) | |
| df = pd.read_csv(path) | |
| color_map = { | |
| "PASS": "#0f766e", | |
| "FAIL": "#b91c1c", | |
| "CONFIG_REQUIRED": "#b45309", | |
| "ENV_FALLBACK": "#b45309", | |
| "INPUT_REQUIRED": "#b45309", | |
| "PARTIAL": "#7c3aed", | |
| "MANUAL_REQUIRED": "#475569", | |
| "REVIEW": "#7c3aed", | |
| } | |
| rows = [] | |
| for _, row in df.iterrows(): | |
| status = str(row.get("Status", "REVIEW")) | |
| color = color_map.get(status, "#475569") | |
| rows.append( | |
| "<div class='compliance-row'>" | |
| f"<span style='background:{color}'>{html.escape(status)}</span>" | |
| f"<strong>{html.escape(str(row.get('Requirement', '')))}</strong>" | |
| f"<p>{html.escape(str(row.get('Evidence', '')))}</p>" | |
| f"<small>{html.escape(str(row.get('File', '')))}</small>" | |
| "</div>" | |
| ) | |
| return ( | |
| "<div class='compliance-board'>" | |
| "<h3>Professor Requirement Compliance Checklist</h3>" | |
| "<p>This separates completed app evidence from items that still need API secrets, " | |
| "NotebookLM/full-text inputs, or mentor approval.</p>" | |
| "<div class='compliance-grid'>" + "".join(rows) + "</div></div>" | |
| ) | |
| def _tccm_dual_status_html() -> str: | |
| path = os.path.join(OUTPUT_DIR, "tccm_dual_validation.csv") | |
| if not os.path.exists(path): | |
| return ( | |
| "<div class='compliance-empty'>Upload NotebookLM and second-LLM extraction CSVs " | |
| "to generate TCCM dual validation.</div>" | |
| ) | |
| df = pd.read_csv(path) | |
| status_col = "Final_TCCM_Compliance_Status" | |
| if status_col not in df.columns: | |
| return "<div class='compliance-empty'>TCCM dual validation is pending source uploads.</div>" | |
| counts = df[status_col].value_counts().to_dict() | |
| cards = [] | |
| for status, count in counts.items(): | |
| ok = "COMPLIANT" in str(status) | |
| color = "#0f766e" if ok else "#b45309" | |
| cards.append( | |
| f"<div class='tccm-card'><strong style='color:{color}'>{count}</strong>" | |
| f"<span>{html.escape(str(status))}</span></div>" | |
| ) | |
| return ( | |
| "<div class='tccm-status'><h3>TCCM Dual Validation Status</h3>" | |
| "<p>Required by email: NotebookLM extraction plus another LLM/extraction method. " | |
| "This screen reconciles those files with regex/semantic extraction.</p>" | |
| "<div>" + "".join(cards) + "</div></div>" | |
| ) | |
| def _on_tccm_dual_validate(notebook_file, second_file): | |
| notebook_path = notebook_file if isinstance(notebook_file, str) else getattr(notebook_file, "name", "") | |
| second_path = second_file if isinstance(second_file, str) else getattr(second_file, "name", "") | |
| write_tccm_dual_validation(notebook_path, second_path) | |
| return _tccm_dual_status_html(), _tccm_dual_table(), _download_files() | |
| def _on_notebooklm_paste(notebook_text): | |
| if not str(notebook_text or "").strip(): | |
| return ( | |
| "<div class='compliance-empty'>Paste the NotebookLM table text first.</div>", | |
| _tccm_dual_table(), | |
| _download_files(), | |
| ) | |
| notebook_path = parse_notebooklm_tccm_text(notebook_text) | |
| write_tccm_dual_validation(notebook_path, "") | |
| count = len(pd.read_csv(notebook_path)) if os.path.exists(notebook_path) else 0 | |
| status = ( | |
| f"<div class='tccm-status'><h3>NotebookLM Paste Imported</h3>" | |
| f"<p>Parsed {count} NotebookLM rows into <code>outputs/notebooklm_extraction.csv</code>. " | |
| "Merged with the independent regex/semantic extractor in " | |
| "<code>outputs/tccm_dual_validation.csv</code>. Upload a second-LLM CSV as well " | |
| "for full NotebookLM + second LLM compliance.</p></div>" | |
| + _tccm_dual_status_html() | |
| ) | |
| return status, _tccm_dual_table(), _download_files() | |
| def _chart_iframe(name: str) -> str: | |
| path = os.path.join(OUTPUT_DIR, "combined_charts", name) | |
| if not os.path.exists(path): | |
| return ( | |
| "<div style='height:320px;display:grid;place-items:center;" | |
| "background:#0f172a;color:#94a3b8;border-radius:8px'>" | |
| "Run the pipeline to generate this chart.</div>" | |
| ) | |
| with open(path, "r", encoding="utf-8") as f: | |
| srcdoc = f.read().replace("&", "&").replace('"', """) | |
| return ( | |
| f"<iframe srcdoc=\"{srcdoc}\" width='100%' height='500' " | |
| "style='border:0;border-radius:8px;background:#0f172a'></iframe>" | |
| ) | |
| def _cards_html() -> str: | |
| if not _exists("combined_labels.json"): | |
| return ( | |
| "<div style='padding:32px;color:#475569;background:#0a1220;" | |
| "border:1px dashed #1e3a5c;border-radius:12px;font-family:Inter,sans-serif'>" | |
| "Clusters will appear here after a complete run.</div>" | |
| ) | |
| cards = [] | |
| for s in _load_json("combined_labels.json"): | |
| evidence = html.escape(" | ".join(s.get("top_titles", [])[:3])) | |
| label = html.escape(s.get("label", "Cluster")) | |
| category = html.escape(s.get("category", "Unmapped")) | |
| keywords = html.escape(", ".join(s.get("keywords", [])[:8])) | |
| conf = int(float(s.get("confidence", 0)) * 100) | |
| cards.append( | |
| "<div class='eis-cluster-card'>" | |
| f"<div style='font-size:15px;font-weight:800;color:#f1f5f9;font-family:Outfit,sans-serif'>{label}</div>" | |
| f"<div style='font-size:11px;color:#60a5fa;margin-top:4px;font-weight:600;letter-spacing:0.3px'>{category}</div>" | |
| f"<div style='margin-top:10px;font-size:12px;color:#475569'>" | |
| f"{s.get('paper_count', 0)} papers · confidence <span style='color:#fbbf24;font-weight:700'>{conf}%</span></div>" | |
| f"<div style='margin-top:8px;font-size:12px;color:#1d4ed8;font-weight:600'>{keywords}</div>" | |
| f"<div style='margin-top:10px;font-size:11px;color:#334155;line-height:1.5'>{evidence}</div>" | |
| "</div>" | |
| ) | |
| return ( | |
| "<div style='display:grid;grid-template-columns:repeat(auto-fit,minmax(280px,1fr));" | |
| "gap:14px;padding:4px 0'>" + "".join(cards) + "</div>" | |
| ) | |
| def _summary_markdown(result=None) -> str: | |
| if result is None and not _exists("run_metadata.json"): | |
| return ( | |
| "Upload the Scopus CSV and click **Run Complete Pipeline**. " | |
| "The app will generate paper-level Title+Abstract+DOI embeddings, optimize " | |
| "UMAP/HDBSCAN clustering, label 15-25 clusters through an in-app council, " | |
| "map them to PAJAIS, and export TCCM validation files." | |
| ) | |
| meta = result or {} | |
| if not meta: | |
| meta = { | |
| "parameters": _load_json("run_metadata.json").get("selected_parameters", {}), | |
| "embedding": _load_json("run_metadata.json").get("embedding", {}), | |
| "clusters": _load_json("combined_labels.json") if _exists("combined_labels.json") else [], | |
| "taxonomy": _load_json("taxonomy_map.json") if _exists("taxonomy_map.json") else {}, | |
| "config": _load_json("corpus_config.json") if _exists("corpus_config.json") else {}, | |
| } | |
| params = meta.get("parameters", {}) | |
| emb = meta.get("embedding", {}) | |
| tax = meta.get("taxonomy", {}).get("coverage_stats", {}) | |
| cfg = meta.get("config", {}) | |
| return ( | |
| f"**Run complete.** Analysed {cfg.get('rows', 'N/A')} papers from " | |
| f"{cfg.get('journal', 'the corpus')} ({cfg.get('year_min')} to {cfg.get('year_max')}).\n\n" | |
| f"Selected clustering: `{params.get('algorithm')}` with " | |
| f"`{params.get('n_clusters')}` clusters, min size `{params.get('min_size')}`, " | |
| f"max size `{params.get('max_size')}`, noise ratio `{params.get('noise_ratio')}`.\n\n" | |
| f"Embedding: `{emb.get('embedding_model')}`. PAJAIS mapped: " | |
| f"`{tax.get('mapped', 0)}`; novel: `{tax.get('novel', 0)}`. " | |
| "Download the optimizer log and council validation for the final submission appendix." | |
| ) | |
| def _run(file_obj): | |
| if file_obj is None: | |
| return ( | |
| "Upload a CSV first.", | |
| _phase_html(), | |
| _cluster_table(), | |
| _cards_html(), | |
| _optimizer_table(), | |
| _compliance_html(), | |
| _compliance_table(), | |
| _council_viz_html(), | |
| _council_table(), | |
| _tccm_table(), | |
| _chart_iframe("intertopic_map.html"), | |
| _chart_iframe("bar_chart.html"), | |
| _chart_iframe("treemap.html"), | |
| _download_files(), | |
| ) | |
| filepath = file_obj if isinstance(file_obj, str) else file_obj.name | |
| result = run_complete_pipeline(filepath) | |
| return ( | |
| _summary_markdown(result), | |
| _phase_html(), | |
| _cluster_table(), | |
| _cards_html(), | |
| _optimizer_table(), | |
| _compliance_html(), | |
| _compliance_table(), | |
| _council_viz_html(), | |
| _council_table(), | |
| _tccm_table(), | |
| _chart_iframe("intertopic_map.html"), | |
| _chart_iframe("bar_chart.html"), | |
| _chart_iframe("treemap.html"), | |
| result["deliverables"], | |
| ) | |
| def _refresh(): | |
| return ( | |
| _summary_markdown(), | |
| _phase_html(), | |
| _cluster_table(), | |
| _cards_html(), | |
| _optimizer_table(), | |
| _compliance_html(), | |
| _compliance_table(), | |
| _council_viz_html(), | |
| _council_table(), | |
| _tccm_table(), | |
| _chart_iframe("intertopic_map.html"), | |
| _chart_iframe("bar_chart.html"), | |
| _chart_iframe("treemap.html"), | |
| _download_files(), | |
| ) | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=Outfit:wght@700;800;900&display=swap'); | |
| * { box-sizing: border-box; } | |
| body, .gradio-container { | |
| background: #070b14 !important; | |
| font-family: 'Inter', sans-serif !important; | |
| } | |
| .gradio-container { max-width: 1400px !important; } | |
| /* ── hero header ── */ | |
| .eis-hero { | |
| background: linear-gradient(135deg, #0d1b2e 0%, #0a2240 40%, #0c1a35 100%); | |
| border: 1px solid #1a3a5c; | |
| border-radius: 16px; | |
| padding: 36px 40px; | |
| margin-bottom: 8px; | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .eis-hero::before { | |
| content: ''; | |
| position: absolute; | |
| top: -60px; right: -60px; | |
| width: 280px; height: 280px; | |
| background: radial-gradient(circle, rgba(245,158,11,0.12) 0%, transparent 70%); | |
| pointer-events: none; | |
| } | |
| .eis-hero::after { | |
| content: ''; | |
| position: absolute; | |
| bottom: -40px; left: 30%; | |
| width: 200px; height: 200px; | |
| background: radial-gradient(circle, rgba(59,130,246,0.1) 0%, transparent 70%); | |
| pointer-events: none; | |
| } | |
| .eis-hero-badge { | |
| display: inline-block; | |
| background: rgba(245,158,11,0.15); | |
| border: 1px solid rgba(245,158,11,0.4); | |
| color: #fbbf24; | |
| font-size: 11px; | |
| font-weight: 700; | |
| letter-spacing: 2px; | |
| text-transform: uppercase; | |
| padding: 4px 12px; | |
| border-radius: 999px; | |
| margin-bottom: 14px; | |
| } | |
| .eis-hero h1 { | |
| font-family: 'Outfit', sans-serif; | |
| font-size: 38px; | |
| font-weight: 900; | |
| margin: 0 0 10px; | |
| background: linear-gradient(90deg, #f8fafc 0%, #93c5fd 60%, #fbbf24 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| line-height: 1.15; | |
| } | |
| .eis-hero p { | |
| color: #94a3b8; | |
| font-size: 15px; | |
| margin: 0; | |
| line-height: 1.6; | |
| max-width: 700px; | |
| } | |
| .eis-hero-stats { | |
| display: flex; | |
| gap: 28px; | |
| margin-top: 22px; | |
| flex-wrap: wrap; | |
| } | |
| .eis-stat { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 2px; | |
| } | |
| .eis-stat strong { | |
| font-family: 'Outfit', sans-serif; | |
| font-size: 22px; | |
| font-weight: 800; | |
| color: #fbbf24; | |
| } | |
| .eis-stat span { | |
| font-size: 11px; | |
| color: #64748b; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| } | |
| /* ── phase chips ── */ | |
| .eis-phase-chip { | |
| display: inline-flex; | |
| gap: 6px; | |
| align-items: center; | |
| padding: 6px 14px; | |
| border-radius: 999px; | |
| font-size: 12px; | |
| font-weight: 700; | |
| letter-spacing: 0.3px; | |
| transition: all 0.2s; | |
| } | |
| .eis-phase-chip.done { | |
| background: linear-gradient(135deg, #1d4ed8, #0891b2); | |
| color: #e0f2fe; | |
| box-shadow: 0 0 12px rgba(59,130,246,0.35); | |
| } | |
| .eis-phase-chip.pending { | |
| background: #0f172a; | |
| color: #475569; | |
| border: 1px solid #1e293b; | |
| } | |
| /* ── upload + run area ── */ | |
| .eis-upload-area { | |
| background: #0d1424; | |
| border: 1px solid #1e3a5c; | |
| border-radius: 12px; | |
| padding: 20px; | |
| } | |
| /* ── compliance ── */ | |
| .compliance-empty { | |
| padding: 28px; | |
| border: 1px dashed #1e3a5c; | |
| border-radius: 10px; | |
| background: #0a1220; | |
| color: #475569; | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .compliance-board, .tccm-status { | |
| background: #0d1424; | |
| border: 1px solid #1e3a5c; | |
| border-radius: 12px; | |
| padding: 20px; | |
| } | |
| .compliance-board h3, .tccm-status h3 { | |
| margin: 0; | |
| color: #f1f5f9; | |
| font-family: 'Outfit', sans-serif; | |
| font-size: 20px; | |
| font-weight: 800; | |
| } | |
| .compliance-board p, .tccm-status p { | |
| color: #64748b; | |
| margin: 6px 0 16px; | |
| line-height: 1.5; | |
| font-size: 14px; | |
| } | |
| .compliance-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); | |
| gap: 10px; | |
| } | |
| .compliance-row { | |
| border: 1px solid #1e293b; | |
| border-radius: 10px; | |
| padding: 14px; | |
| background: #0a1220; | |
| transition: border-color 0.2s; | |
| } | |
| .compliance-row:hover { border-color: #2563eb; } | |
| .compliance-row span { | |
| display: inline-block; | |
| color: white; | |
| font-size: 10px; | |
| font-weight: 800; | |
| padding: 3px 10px; | |
| border-radius: 999px; | |
| margin-bottom: 8px; | |
| letter-spacing: 0.5px; | |
| } | |
| .compliance-row strong { display: block; color: #e2e8f0; font-size: 13px; } | |
| .compliance-row p { font-size: 12px; margin: 6px 0; color: #64748b; } | |
| .compliance-row small { color: #334155; font-size: 11px; } | |
| /* ── tccm cards ── */ | |
| .tccm-status > div { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); | |
| gap: 10px; | |
| } | |
| .tccm-card { | |
| border: 1px solid #1e293b; | |
| border-radius: 10px; | |
| padding: 14px; | |
| background: #0a1220; | |
| } | |
| .tccm-card strong { display: block; font-size: 28px; color: #fbbf24; font-family: 'Outfit', sans-serif; } | |
| .tccm-card span { color: #64748b; font-size: 12px; font-weight: 600; } | |
| /* ── council board ── */ | |
| .council-empty { | |
| padding: 32px; | |
| border: 1px dashed #1e3a5c; | |
| color: #475569; | |
| border-radius: 12px; | |
| background: #0a1220; | |
| } | |
| .council-board { | |
| background: linear-gradient(160deg, #06101e 0%, #08162a 100%); | |
| color: #e5edf7; | |
| border-radius: 14px; | |
| padding: 22px; | |
| border: 1px solid #1a3354; | |
| } | |
| .council-top { | |
| display: grid; | |
| grid-template-columns: minmax(280px, 1.2fr) minmax(320px, 1fr); | |
| gap: 16px; | |
| align-items: start; | |
| } | |
| .council-top h3 { | |
| margin: 0; | |
| font-size: 20px; | |
| font-family: 'Outfit', sans-serif; | |
| font-weight: 800; | |
| color: #f1f5f9; | |
| } | |
| .council-top p { margin: 6px 0 0; color: #64748b; line-height: 1.5; font-size: 14px; } | |
| .council-metrics { display: grid; grid-template-columns: repeat(2, minmax(140px, 1fr)); gap: 8px; } | |
| .council-metrics div { | |
| background: #0c1e35; | |
| border: 1px solid #1a3a5c; | |
| border-radius: 10px; | |
| padding: 12px; | |
| } | |
| .council-metrics strong { display: block; color: #fbbf24; font-size: 20px; font-family: 'Outfit', sans-serif; } | |
| .council-metrics span { color: #64748b; font-size: 12px; } | |
| .council-lane { | |
| display: grid; | |
| grid-template-columns: 1fr 60px 1fr 60px 1fr 60px 1fr; | |
| gap: 8px; | |
| align-items: center; | |
| margin: 22px 0; | |
| } | |
| .pulse-node { | |
| min-height: 64px; | |
| display: grid; | |
| place-items: center; | |
| text-align: center; | |
| border: 1px solid #1d4ed8; | |
| background: linear-gradient(135deg, #0f2040, #0d1a35); | |
| border-radius: 10px; | |
| color: #93c5fd; | |
| font-weight: 800; | |
| animation: councilGlow 1.8s ease-in-out infinite; | |
| } | |
| .pulse-node span { display: block; font-size: 11px; font-weight: 600; color: #bfdbfe; margin-top: 3px; } | |
| .pulse-node.final { border-color: #d97706; background: linear-gradient(135deg, #1c1000, #2a1800); color: #fbbf24; } | |
| .pulse-line { | |
| height: 3px; | |
| border-radius: 999px; | |
| background: linear-gradient(90deg, #1d4ed8, #f59e0b, #1d4ed8); | |
| background-size: 220% 100%; | |
| animation: councilFlow 1.1s linear infinite; | |
| } | |
| .council-cluster { | |
| margin-top: 12px; | |
| padding: 14px; | |
| border: 1px solid #1a3354; | |
| border-radius: 10px; | |
| background: #080f1c; | |
| } | |
| .cluster-head { display: flex; justify-content: space-between; gap: 12px; color: #cbd5e1; margin-bottom: 10px; } | |
| .cluster-head span { color: #60a5fa; font-weight: 800; } | |
| .cluster-head strong { color: #f8fafc; } | |
| .council-flow { display: grid; grid-template-columns: repeat(4, minmax(160px, 1fr)); gap: 8px; } | |
| .council-vote, .final-label { | |
| border-radius: 10px; | |
| padding: 12px; | |
| background: #0c1e35; | |
| border: 1px solid #1a3a5c; | |
| min-height: 82px; | |
| } | |
| .council-vote { display: flex; gap: 8px; align-items: flex-start; } | |
| .vote-dot { | |
| width: 10px; height: 10px; | |
| margin-top: 4px; | |
| border-radius: 50%; | |
| background: #f59e0b; | |
| box-shadow: 0 0 14px rgba(245,158,11,0.7); | |
| animation: councilBlink 1.2s ease-in-out infinite; | |
| flex: 0 0 auto; | |
| } | |
| .council-vote strong, .final-label strong { display: block; color: #e2e8f0; font-size: 13px; } | |
| .council-vote span, .final-label span { display: block; color: #fbbf24; font-size: 12px; margin-top: 3px; } | |
| .council-vote small, .final-label small { display: block; color: #475569; font-size: 11px; margin-top: 4px; line-height: 1.25; } | |
| .final-label { border-color: #d97706; background: #130d00; } | |
| /* ── cluster cards ── */ | |
| .eis-cluster-card { | |
| background: linear-gradient(135deg, #0d1830 0%, #0a1220 100%); | |
| border: 1px solid #1e3a5c; | |
| border-left: 4px solid #f59e0b; | |
| border-radius: 12px; | |
| padding: 18px; | |
| min-height: 180px; | |
| transition: transform 0.2s, box-shadow 0.2s, border-color 0.2s; | |
| } | |
| .eis-cluster-card:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 8px 32px rgba(245,158,11,0.12); | |
| border-color: #2563eb; | |
| } | |
| /* ── Gradio overrides ── */ | |
| .gradio-container .tabs { background: transparent !important; } | |
| .gradio-container .tab-nav button { | |
| color: #64748b !important; | |
| font-weight: 600 !important; | |
| border-bottom: 2px solid transparent !important; | |
| transition: all 0.2s !important; | |
| } | |
| .gradio-container .tab-nav button.selected { | |
| color: #fbbf24 !important; | |
| border-bottom-color: #f59e0b !important; | |
| background: transparent !important; | |
| } | |
| .gradio-container label { color: #94a3b8 !important; font-size: 13px !important; } | |
| .gradio-container .prose { color: #94a3b8 !important; } | |
| /* ── animations ── */ | |
| @keyframes councilFlow { from { background-position: 0% 0; } to { background-position: 220% 0; } } | |
| @keyframes councilGlow { 0%, 100% { box-shadow: 0 0 0 rgba(29,78,216,0.2); } 50% { box-shadow: 0 0 22px rgba(245,158,11,0.3); } } | |
| @keyframes councilBlink { 0%, 100% { opacity: .35; transform: scale(.75); } 50% { opacity: 1; transform: scale(1.15); } } | |
| @keyframes fadeIn { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: translateY(0); } } | |
| @media (max-width: 900px) { | |
| .council-top, .council-flow { grid-template-columns: 1fr; } | |
| .council-lane { grid-template-columns: 1fr; } | |
| .pulse-line { height: 18px; width: 3px; justify-self: center; } | |
| .eis-hero h1 { font-size: 26px; } | |
| .eis-hero-stats { gap: 16px; } | |
| } | |
| """ | |
| with gr.Blocks(title="EIS Topic Intelligence", css=CSS, theme=gr.themes.Base()) as demo: | |
| gr.HTML( | |
| "<div class='eis-hero'>" | |
| "<div class='eis-hero-badge'>EIS · SPJIMR Research Analytics</div>" | |
| "<h1>EIS Topic Intelligence</h1>" | |
| "<p>Paper-level SPECTER2 / TF-IDF embeddings · UMAP + HDBSCAN optimised clustering" | |
| " · Live Mistral LLM council validation · PAJAIS taxonomy mapping · TCCM extraction</p>" | |
| "<div class='eis-hero-stats'>" | |
| "<div class='eis-stat'><strong>15–25</strong><span>Target clusters</span></div>" | |
| "<div class='eis-stat'><strong>3</strong><span>Council validators</span></div>" | |
| "<div class='eis-stat'><strong>25</strong><span>PAJAIS categories</span></div>" | |
| "<div class='eis-stat'><strong>100</strong><span>TCCM papers</span></div>" | |
| "</div>" | |
| "</div>" | |
| ) | |
| phase = gr.HTML(value=_phase_html()) | |
| with gr.Row(): | |
| csv_file = gr.File(label="📂 Upload Scopus Journal CSV", file_types=[".csv"], scale=3) | |
| with gr.Column(scale=1): | |
| run_btn = gr.Button("▶ Run Complete Pipeline", variant="primary") | |
| refresh_btn = gr.Button("↻ Refresh Outputs") | |
| summary = gr.Markdown(value=_summary_markdown()) | |
| with gr.Tabs(): | |
| with gr.Tab("Clusters"): | |
| cluster_table = gr.Dataframe( | |
| headers=[ | |
| "Cluster ID", "Label", "PAJAIS Category", "Papers", "Confidence", | |
| "Agreement", "Keywords", "Top 3 Titles", "Reasoning", | |
| ], | |
| value=_cluster_table(), | |
| wrap=True, | |
| interactive=False, | |
| ) | |
| cluster_cards = gr.HTML(value=_cards_html()) | |
| with gr.Tab("Optimization"): | |
| optimizer_table = gr.Dataframe(value=_optimizer_table(), wrap=True, interactive=False) | |
| with gr.Tab("Compliance"): | |
| compliance_panel = gr.HTML(value=_compliance_html()) | |
| compliance_table = gr.Dataframe(value=_compliance_table(), wrap=True, interactive=False) | |
| with gr.Tab("Council Validation"): | |
| council_viz = gr.HTML(value=_council_viz_html()) | |
| council_table = gr.Dataframe(value=_council_table(), wrap=True, interactive=False) | |
| with gr.Tab("TCCM Validation"): | |
| tccm_table = gr.Dataframe(value=_tccm_table(), wrap=True, interactive=False) | |
| with gr.Tab("Charts"): | |
| chart_map = gr.HTML(value=_chart_iframe("intertopic_map.html")) | |
| chart_bar = gr.HTML(value=_chart_iframe("bar_chart.html")) | |
| chart_tree = gr.HTML(value=_chart_iframe("treemap.html")) | |
| with gr.Tab("Downloads"): | |
| downloads = gr.File(value=_download_files(), label="Generated deliverables", file_count="multiple") | |
| outputs = [ | |
| summary, | |
| phase, | |
| cluster_table, | |
| cluster_cards, | |
| optimizer_table, | |
| compliance_panel, | |
| compliance_table, | |
| council_viz, | |
| council_table, | |
| tccm_table, | |
| chart_map, | |
| chart_bar, | |
| chart_tree, | |
| downloads, | |
| ] | |
| run_btn.click(fn=_run, inputs=[csv_file], outputs=outputs, show_api=False, api_name=False) | |
| refresh_btn.click(fn=_refresh, inputs=None, outputs=outputs, show_api=False, api_name=False) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) | |