""" app.py — Gradio Web Application (Entry Point) AI-Powered Topic Modelling for Academic Journal Analysis. Satisfies all deliverables C1–C10. Run with: python app.py """ from __future__ import annotations import os import traceback import pandas as pd import gradio as gr from pathlib import Path # ── Load .env file (API keys) ──────────────────────────────────────────── def _load_env() -> None: env_path = Path(__file__).parent / ".env" if not env_path.exists(): return for line in env_path.read_text(encoding="utf-8").splitlines(): line = line.strip() if not line or line.startswith("#") or "=" not in line: continue key, val = line.split("=", 1) key = key.strip() val = val.strip().strip('"').strip("'") if val and not os.getenv(key): os.environ[key] = val _load_env() # ── Import the agent ───────────────────────────────────────────────────── from agent import TopicModellingAgent # ════════════════════════════════════════════════════════════════════════════ # Colour / Style Tokens # ════════════════════════════════════════════════════════════════════════════ ACCENT = "#00d4aa" ACCENT2 = "#7c3aed" BG_DARK = "#0f0f1a" BG_MID = "#161625" BG_CARD = "#1c1c32" TXT = "#e2e8f0" TXT_MUTED = "#94a3b8" BORDER = "#2d2d44" # ════════════════════════════════════════════════════════════════════════════ # Custom CSS # ════════════════════════════════════════════════════════════════════════════ CSS = f""" @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@300;400;500;600;700&display=swap'); :root {{ --accent: {ACCENT}; --accent2: {ACCENT2}; --bg-dark: {BG_DARK}; --bg-mid: {BG_MID}; --bg-card: {BG_CARD}; --txt: {TXT}; --txt-muted: {TXT_MUTED}; --border: {BORDER}; }} /* ── Global ── */ body, .gradio-container {{ background: var(--bg-dark) !important; color: var(--txt) !important; font-family: 'Inter', -apple-system, sans-serif !important; }} /* ── Header ── */ .app-header {{ background: linear-gradient(135deg, {BG_CARD} 0%, {BG_MID} 100%); border: 1px solid {BORDER}; border-radius: 16px; padding: 32px 40px; margin-bottom: 24px; position: relative; overflow: hidden; }} .app-header::before {{ content: ''; position: absolute; top: -50%; left: -50%; width: 200%; height: 200%; background: radial-gradient(circle at 30% 50%, {ACCENT}08 0%, transparent 50%), radial-gradient(circle at 70% 50%, {ACCENT2}06 0%, transparent 50%); pointer-events: none; }} .app-header h1 {{ font-family: 'Inter', sans-serif; font-size: 2rem; font-weight: 800; color: {TXT}; margin: 0 0 6px; letter-spacing: -0.03em; position: relative; }} .app-header h1 span {{ color: {ACCENT}; }} .app-header p {{ color: {TXT_MUTED}; font-size: 13px; margin: 0; font-family: 'JetBrains Mono', monospace; letter-spacing: 1px; position: relative; }} /* ── Section cards ── */ .section-card {{ background: {BG_CARD}; border: 1px solid {BORDER}; border-radius: 12px; padding: 20px 24px; margin-bottom: 16px; }} .section-title {{ font-family: 'JetBrains Mono', monospace; font-size: 11px; font-weight: 700; letter-spacing: 3px; text-transform: uppercase; color: {ACCENT}; margin-bottom: 16px; padding-bottom: 10px; border-bottom: 1px solid {BORDER}; }} /* ── Run button ── */ .btn-run {{ background: linear-gradient(135deg, {ACCENT} 0%, #00b894 100%) !important; color: {BG_DARK} !important; border: none !important; border-radius: 10px !important; font-family: 'JetBrains Mono', monospace !important; font-weight: 700 !important; font-size: 14px !important; letter-spacing: 1px !important; padding: 14px 28px !important; transition: all 0.3s ease !important; box-shadow: 0 4px 15px {ACCENT}30 !important; }} .btn-run:hover {{ transform: translateY(-2px) !important; box-shadow: 0 8px 25px {ACCENT}50 !important; }} /* ── Tab navigation ── */ .tab-nav button {{ font-family: 'JetBrains Mono', monospace !important; font-size: 12px !important; font-weight: 600 !important; letter-spacing: 0.5px !important; color: {TXT_MUTED} !important; border: none !important; background: transparent !important; padding: 10px 16px !important; transition: all 0.2s !important; }} .tab-nav button.selected {{ color: {ACCENT} !important; border-bottom: 2px solid {ACCENT} !important; }} /* ── Dataframe styling ── */ .gradio-dataframe table {{ background: {BG_MID} !important; border-collapse: collapse; }} .gradio-dataframe th {{ background: {BG_CARD} !important; color: {ACCENT} !important; font-family: 'JetBrains Mono', monospace !important; font-size: 11px !important; letter-spacing: 1px !important; text-transform: uppercase !important; padding: 10px 12px !important; border-bottom: 1px solid {BORDER} !important; }} .gradio-dataframe td {{ color: {TXT} !important; font-size: 12px !important; padding: 8px 12px !important; border-bottom: 1px solid {BORDER}80 !important; font-family: 'JetBrains Mono', monospace !important; }} .gradio-dataframe tr:hover td {{ background: {ACCENT}08 !important; }} /* ── Inputs ── */ input, textarea, select, .gradio-textbox textarea {{ background: {BG_MID} !important; border: 1px solid {BORDER} !important; color: {TXT} !important; font-family: 'JetBrains Mono', monospace !important; font-size: 13px !important; border-radius: 8px !important; }} input:focus, textarea:focus {{ border-color: {ACCENT} !important; box-shadow: 0 0 0 2px {ACCENT}30 !important; }} /* ── Labels ── */ label, .gradio-label {{ color: {TXT_MUTED} !important; font-family: 'JetBrains Mono', monospace !important; font-size: 11px !important; letter-spacing: 1px !important; text-transform: uppercase !important; }} /* ── File upload ── */ .gradio-file {{ background: {BG_CARD} !important; border: 2px dashed {BORDER} !important; border-radius: 10px !important; transition: border-color 0.3s !important; }} .gradio-file:hover {{ border-color: {ACCENT} !important; }} /* ── Accordion ── */ .gradio-accordion {{ background: {BG_CARD} !important; border: 1px solid {BORDER} !important; border-radius: 10px !important; }} /* ── Stats badges ── */ .stats-badge {{ display: inline-block; background: {ACCENT}15; border: 1px solid {ACCENT}40; border-radius: 8px; padding: 10px 18px; margin: 4px 6px; font-family: 'JetBrains Mono', monospace; font-size: 13px; color: {ACCENT}; }} .stats-badge.novel {{ background: {ACCENT2}15; border-color: {ACCENT2}40; color: {ACCENT2}; }} .stats-badge b {{ font-size: 18px; display: block; margin-top: 2px; }} """ # ════════════════════════════════════════════════════════════════════════════ # Helper Functions # ════════════════════════════════════════════════════════════════════════════ def build_stats_html(results: dict | None) -> str: """Build the statistics summary HTML panel.""" if not results: return ( f'
' f'Upload a CSV and click Run Analysis ' f'to see results.
' ) total = results.get("total_topics", 0) title_n = results.get("title_topics", 0) abstract_n = results.get("abstract_topics", 0) mapped = results.get("mapped", 0) novel = results.get("novel", 0) return f"""
📊 Total Topics{total}
📝 Title Topics{title_n}
📄 Abstract Topics{abstract_n}
✅ MAPPED{mapped}
🆕 NOVEL{novel}
""" # ════════════════════════════════════════════════════════════════════════════ # Main Analysis Handler # ════════════════════════════════════════════════════════════════════════════ EMPTY_REVIEW = pd.DataFrame(columns=["topic_id", "source", "keywords", "label"]) EMPTY_MAPPING = pd.DataFrame( columns=["topic_id", "source", "label", "pajais_category", "status", "confidence"] ) def run_analysis(file_obj, api_key, provider, progress=gr.Progress()): """Run the full topic modelling pipeline and return all UI outputs.""" empty = ( EMPTY_REVIEW, EMPTY_MAPPING, build_stats_html(None), "", "", "", [], ) # ── Validate upload ────────────────────────────────────────────── if file_obj is None: return ( EMPTY_REVIEW, EMPTY_MAPPING, build_stats_html(None), "", "", "❌ No file uploaded. Please upload a CSV file first.", [], ) # ── Resolve provider ───────────────────────────────────────────── prov = None if provider == "Auto-detect" else provider.lower() key = api_key.strip() if api_key else None agent = TopicModellingAgent(api_key=key, provider=prov) try: # ── Run pipeline ───────────────────────────────────────────── csv_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj) results = agent.run_pipeline(csv_path, progress_callback=progress) review_df = agent.get_review_table() mapping_df = agent.get_mapping_table() stats_html = build_stats_html(results) narrative = agent.narrative reflection = agent.reflection logs = "\n".join(agent.logs) dl_files = agent.get_download_files() return ( review_df, mapping_df, stats_html, narrative, reflection, logs, dl_files, ) except Exception as e: error_msg = f"❌ Error: {e}\n\n{traceback.format_exc()}" return ( EMPTY_REVIEW, EMPTY_MAPPING, build_stats_html(None), "", "", error_msg, [], ) # ════════════════════════════════════════════════════════════════════════════ # Gradio UI # ════════════════════════════════════════════════════════════════════════════ with gr.Blocks( title="AI Topic Modelling — Academic Journal Analysis", ) as demo: # ── Header ──────────────────────────────────────────────────────────── gr.HTML(f"""

TopicModeller AI

NMF Topic Modelling · PAJAIS Taxonomy Mapping · LLM-Enhanced Labeling

""") # ════════════════════════════════════════════════════════════════════════ # SECTION 1 — Upload & Configure # ════════════════════════════════════════════════════════════════════════ with gr.Group(elem_classes="section-card"): gr.HTML('
① Upload & Configure
') with gr.Row(): with gr.Column(scale=3): file_input = gr.File( label="Upload CSV (requires 'title' and 'abstract' columns)", file_types=[".csv"], file_count="single", ) with gr.Column(scale=2): api_key_input = gr.Textbox( label="LLM API Key (optional — enhances labels)", placeholder="sk-... or gsk_... or your API key", type="password", ) provider_dropdown = gr.Dropdown( label="LLM Provider", choices=["Auto-detect", "Groq", "Mistral", "OpenAI"], value="Auto-detect", ) run_btn = gr.Button( "🚀 Run Full Analysis", elem_classes="btn-run", size="lg", ) gr.HTML(f"""
ℹ️ Note: Works without an API key (uses heuristic keyword labels). For LLM-enhanced labels, provide a Groq (free at console.groq.com), Mistral, or OpenAI key. Keys from .env file are loaded automatically.
""") # ════════════════════════════════════════════════════════════════════════ # SECTION 2 — Results # ════════════════════════════════════════════════════════════════════════ with gr.Group(elem_classes="section-card"): gr.HTML('
② Results
') stats_display = gr.HTML(value=build_stats_html(None)) with gr.Tabs(): # ── Tab 1: Review Table (C4) ───────────────────────────── with gr.Tab("📊 Review Table (C4)"): gr.HTML(f"""

All generated topics with IDs, source (title/abstract), keywords, and labels. Target: 98+ rows (50 title topics + 50 abstract topics).

""") review_table = gr.Dataframe( value=EMPTY_REVIEW, label="Topic Review Table", interactive=False, wrap=True, ) # ── Tab 2: PAJAIS Mapping (C5) ─────────────────────────── with gr.Tab("🗺️ PAJAIS Mapping (C5)"): gr.HTML(f"""

Each topic mapped to the PAJAIS 25-category taxonomy. Status: MAPPED (matches taxonomy) or NOVEL (new / unmapped theme).

""") mapping_table = gr.Dataframe( value=EMPTY_MAPPING, label="PAJAIS Taxonomy Mapping", interactive=False, wrap=True, ) # ── Tab 3: Narrative (C8) ──────────────────────────────── with gr.Tab("📝 Narrative (C8)"): narrative_output = gr.Textbox( label="Academic Narrative (~500 words)", lines=20, interactive=False, placeholder="Run analysis to generate the narrative...", ) # ── Tab 4: Reflection (C10) ────────────────────────────── with gr.Tab("💭 Reflection (C10)"): reflection_output = gr.Textbox( label="Research Reflection (~250 words)", lines=14, interactive=False, placeholder="Run analysis to generate the reflection...", ) # ── Tab 5: Pipeline Logs ───────────────────────────────── with gr.Tab("📋 Pipeline Logs"): logs_output = gr.Textbox( label="Execution Log", lines=20, interactive=False, placeholder="Pipeline logs will appear here after analysis...", ) # ════════════════════════════════════════════════════════════════════════ # SECTION 3 — Downloads # ════════════════════════════════════════════════════════════════════════ with gr.Group(elem_classes="section-card"): gr.HTML('
③ Download Outputs
') gr.HTML(f"""

Generated files: comparison.csv (C6) · taxonomy_map.json (C7) · narrative.txt (C8) · prompts.txt (C9) · reflection.txt (C10)

""") download_files = gr.File( label="Output Files", file_count="multiple", interactive=False, ) # ════════════════════════════════════════════════════════════════════════ # SECTION 4 — Help # ════════════════════════════════════════════════════════════════════════ with gr.Group(elem_classes="section-card"): gr.HTML('
④ Help & Deliverables
') with gr.Accordion("📋 Assignment Deliverables Checklist", open=False): gr.HTML(f"""
C1. HuggingFace-compatible Gradio app ✓
C2. 3 Python files: app.py, tools.py, agent.py ✓
C3. requirements.txt with 13 packages ✓
C4. Review table (98+ topics) → "Review Table" tab ✓
C5. PAJAIS mapping (MAPPED/NOVEL) → "PAJAIS Mapping" tab ✓
C6. comparison.csv → Downloads ✓
C7. taxonomy_map.json → Downloads ✓
C8. narrative.txt (~500 words) → "Narrative" tab ✓
C9. prompts.txt → Downloads ✓
C10. reflection.txt (~250 words) → "Reflection" tab ✓
""") with gr.Accordion("⚠️ Troubleshooting", open=False): gr.HTML(f"""
Missing columns error
Ensure CSV has lowercase 'title' and 'abstract' columns.

Rate limit (429 error)
LLM API rate limit. Wait 1-2 min and retry. App still works without LLM (uses heuristic labels).

Too few topics
Dataset may be too small. Minimum ~20 papers recommended for meaningful topic modelling.

Empty narrative/reflection
If LLM unavailable, template-based text is used automatically.
""") # ════════════════════════════════════════════════════════════════════════ # Wire Events # ════════════════════════════════════════════════════════════════════════ run_btn.click( fn=run_analysis, inputs=[file_input, api_key_input, provider_dropdown], outputs=[ review_table, mapping_table, stats_display, narrative_output, reflection_output, logs_output, download_files, ], ) # ════════════════════════════════════════════════════════════════════════════ # Launch # ════════════════════════════════════════════════════════════════════════════ if __name__ == "__main__": demo.launch( show_error=True, theme=gr.themes.Base( primary_hue="teal", secondary_hue="purple", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), font_mono=gr.themes.GoogleFont("JetBrains Mono"), ), css=CSS, )