Spaces:
Sleeping
Sleeping
| """ | |
| app.py β Gradio Web Application (Entry Point) | |
| AI-Powered Topic Modelling for Academic Journal Analysis. | |
| Satisfies all deliverables C1βC10. | |
| Run with: python app.py | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import traceback | |
| import pandas as pd | |
| import gradio as gr | |
| from pathlib import Path | |
| # ββ Load .env file (API keys) ββββββββββββββββββββββββββββββββββββββββββββ | |
| def _load_env() -> None: | |
| env_path = Path(__file__).parent / ".env" | |
| if not env_path.exists(): | |
| return | |
| for line in env_path.read_text(encoding="utf-8").splitlines(): | |
| line = line.strip() | |
| if not line or line.startswith("#") or "=" not in line: | |
| continue | |
| key, val = line.split("=", 1) | |
| key = key.strip() | |
| val = val.strip().strip('"').strip("'") | |
| if val and not os.getenv(key): | |
| os.environ[key] = val | |
| _load_env() | |
| # ββ Import the agent βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| from agent import TopicModellingAgent | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Colour / Style Tokens | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ACCENT = "#00d4aa" | |
| ACCENT2 = "#7c3aed" | |
| BG_DARK = "#0f0f1a" | |
| BG_MID = "#161625" | |
| BG_CARD = "#1c1c32" | |
| TXT = "#e2e8f0" | |
| TXT_MUTED = "#94a3b8" | |
| BORDER = "#2d2d44" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Custom CSS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = f""" | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@300;400;500;600;700&display=swap'); | |
| :root {{ | |
| --accent: {ACCENT}; | |
| --accent2: {ACCENT2}; | |
| --bg-dark: {BG_DARK}; | |
| --bg-mid: {BG_MID}; | |
| --bg-card: {BG_CARD}; | |
| --txt: {TXT}; | |
| --txt-muted: {TXT_MUTED}; | |
| --border: {BORDER}; | |
| }} | |
| /* ββ Global ββ */ | |
| body, .gradio-container {{ | |
| background: var(--bg-dark) !important; | |
| color: var(--txt) !important; | |
| font-family: 'Inter', -apple-system, sans-serif !important; | |
| }} | |
| /* ββ Header ββ */ | |
| .app-header {{ | |
| background: linear-gradient(135deg, {BG_CARD} 0%, {BG_MID} 100%); | |
| border: 1px solid {BORDER}; | |
| border-radius: 16px; | |
| padding: 32px 40px; | |
| margin-bottom: 24px; | |
| position: relative; | |
| overflow: hidden; | |
| }} | |
| .app-header::before {{ | |
| content: ''; | |
| position: absolute; | |
| top: -50%; left: -50%; | |
| width: 200%; height: 200%; | |
| background: | |
| radial-gradient(circle at 30% 50%, {ACCENT}08 0%, transparent 50%), | |
| radial-gradient(circle at 70% 50%, {ACCENT2}06 0%, transparent 50%); | |
| pointer-events: none; | |
| }} | |
| .app-header h1 {{ | |
| font-family: 'Inter', sans-serif; | |
| font-size: 2rem; font-weight: 800; | |
| color: {TXT}; margin: 0 0 6px; | |
| letter-spacing: -0.03em; | |
| position: relative; | |
| }} | |
| .app-header h1 span {{ color: {ACCENT}; }} | |
| .app-header p {{ | |
| color: {TXT_MUTED}; font-size: 13px; margin: 0; | |
| font-family: 'JetBrains Mono', monospace; | |
| letter-spacing: 1px; position: relative; | |
| }} | |
| /* ββ Section cards ββ */ | |
| .section-card {{ | |
| background: {BG_CARD}; | |
| border: 1px solid {BORDER}; | |
| border-radius: 12px; | |
| padding: 20px 24px; | |
| margin-bottom: 16px; | |
| }} | |
| .section-title {{ | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 11px; font-weight: 700; | |
| letter-spacing: 3px; text-transform: uppercase; | |
| color: {ACCENT}; | |
| margin-bottom: 16px; padding-bottom: 10px; | |
| border-bottom: 1px solid {BORDER}; | |
| }} | |
| /* ββ Run button ββ */ | |
| .btn-run {{ | |
| background: linear-gradient(135deg, {ACCENT} 0%, #00b894 100%) !important; | |
| color: {BG_DARK} !important; | |
| border: none !important; border-radius: 10px !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-weight: 700 !important; font-size: 14px !important; | |
| letter-spacing: 1px !important; | |
| padding: 14px 28px !important; | |
| transition: all 0.3s ease !important; | |
| box-shadow: 0 4px 15px {ACCENT}30 !important; | |
| }} | |
| .btn-run:hover {{ | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 25px {ACCENT}50 !important; | |
| }} | |
| /* ββ Tab navigation ββ */ | |
| .tab-nav button {{ | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 12px !important; font-weight: 600 !important; | |
| letter-spacing: 0.5px !important; | |
| color: {TXT_MUTED} !important; | |
| border: none !important; background: transparent !important; | |
| padding: 10px 16px !important; | |
| transition: all 0.2s !important; | |
| }} | |
| .tab-nav button.selected {{ | |
| color: {ACCENT} !important; | |
| border-bottom: 2px solid {ACCENT} !important; | |
| }} | |
| /* ββ Dataframe styling ββ */ | |
| .gradio-dataframe table {{ | |
| background: {BG_MID} !important; | |
| border-collapse: collapse; | |
| }} | |
| .gradio-dataframe th {{ | |
| background: {BG_CARD} !important; | |
| color: {ACCENT} !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 11px !important; | |
| letter-spacing: 1px !important; | |
| text-transform: uppercase !important; | |
| padding: 10px 12px !important; | |
| border-bottom: 1px solid {BORDER} !important; | |
| }} | |
| .gradio-dataframe td {{ | |
| color: {TXT} !important; | |
| font-size: 12px !important; | |
| padding: 8px 12px !important; | |
| border-bottom: 1px solid {BORDER}80 !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| }} | |
| .gradio-dataframe tr:hover td {{ | |
| background: {ACCENT}08 !important; | |
| }} | |
| /* ββ Inputs ββ */ | |
| input, textarea, select, .gradio-textbox textarea {{ | |
| background: {BG_MID} !important; | |
| border: 1px solid {BORDER} !important; | |
| color: {TXT} !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 13px !important; | |
| border-radius: 8px !important; | |
| }} | |
| input:focus, textarea:focus {{ | |
| border-color: {ACCENT} !important; | |
| box-shadow: 0 0 0 2px {ACCENT}30 !important; | |
| }} | |
| /* ββ Labels ββ */ | |
| label, .gradio-label {{ | |
| color: {TXT_MUTED} !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 11px !important; | |
| letter-spacing: 1px !important; | |
| text-transform: uppercase !important; | |
| }} | |
| /* ββ File upload ββ */ | |
| .gradio-file {{ | |
| background: {BG_CARD} !important; | |
| border: 2px dashed {BORDER} !important; | |
| border-radius: 10px !important; | |
| transition: border-color 0.3s !important; | |
| }} | |
| .gradio-file:hover {{ | |
| border-color: {ACCENT} !important; | |
| }} | |
| /* ββ Accordion ββ */ | |
| .gradio-accordion {{ | |
| background: {BG_CARD} !important; | |
| border: 1px solid {BORDER} !important; | |
| border-radius: 10px !important; | |
| }} | |
| /* ββ Stats badges ββ */ | |
| .stats-badge {{ | |
| display: inline-block; | |
| background: {ACCENT}15; | |
| border: 1px solid {ACCENT}40; | |
| border-radius: 8px; | |
| padding: 10px 18px; margin: 4px 6px; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 13px; color: {ACCENT}; | |
| }} | |
| .stats-badge.novel {{ | |
| background: {ACCENT2}15; | |
| border-color: {ACCENT2}40; | |
| color: {ACCENT2}; | |
| }} | |
| .stats-badge b {{ | |
| font-size: 18px; | |
| display: block; | |
| margin-top: 2px; | |
| }} | |
| """ | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Helper Functions | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_stats_html(results: dict | None) -> str: | |
| """Build the statistics summary HTML panel.""" | |
| if not results: | |
| return ( | |
| f'<div style="color:{TXT_MUTED};text-align:center;padding:40px;' | |
| f'font-family:JetBrains Mono,monospace;font-size:13px;">' | |
| f'Upload a CSV and click <b style="color:{ACCENT};">Run Analysis</b> ' | |
| f'to see results.</div>' | |
| ) | |
| total = results.get("total_topics", 0) | |
| title_n = results.get("title_topics", 0) | |
| abstract_n = results.get("abstract_topics", 0) | |
| mapped = results.get("mapped", 0) | |
| novel = results.get("novel", 0) | |
| return f""" | |
| <div style="display:flex;flex-wrap:wrap;gap:12px;justify-content:center;padding:20px 10px;"> | |
| <div class="stats-badge">π Total Topics<b>{total}</b></div> | |
| <div class="stats-badge">π Title Topics<b>{title_n}</b></div> | |
| <div class="stats-badge">π Abstract Topics<b>{abstract_n}</b></div> | |
| <div class="stats-badge">β MAPPED<b>{mapped}</b></div> | |
| <div class="stats-badge novel">π NOVEL<b>{novel}</b></div> | |
| </div> | |
| """ | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Main Analysis Handler | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| EMPTY_REVIEW = pd.DataFrame(columns=["topic_id", "source", "keywords", "label"]) | |
| EMPTY_MAPPING = pd.DataFrame( | |
| columns=["topic_id", "source", "label", "pajais_category", "status", "confidence"] | |
| ) | |
| def run_analysis(file_obj, api_key, provider, progress=gr.Progress()): | |
| """Run the full topic modelling pipeline and return all UI outputs.""" | |
| empty = ( | |
| EMPTY_REVIEW, | |
| EMPTY_MAPPING, | |
| build_stats_html(None), | |
| "", "", | |
| "", | |
| [], | |
| ) | |
| # ββ Validate upload ββββββββββββββββββββββββββββββββββββββββββββββ | |
| if file_obj is None: | |
| return ( | |
| EMPTY_REVIEW, | |
| EMPTY_MAPPING, | |
| build_stats_html(None), | |
| "", "", | |
| "β No file uploaded. Please upload a CSV file first.", | |
| [], | |
| ) | |
| # ββ Resolve provider βββββββββββββββββββββββββββββββββββββββββββββ | |
| prov = None if provider == "Auto-detect" else provider.lower() | |
| key = api_key.strip() if api_key else None | |
| agent = TopicModellingAgent(api_key=key, provider=prov) | |
| try: | |
| # ββ Run pipeline βββββββββββββββββββββββββββββββββββββββββββββ | |
| csv_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj) | |
| results = agent.run_pipeline(csv_path, progress_callback=progress) | |
| review_df = agent.get_review_table() | |
| mapping_df = agent.get_mapping_table() | |
| stats_html = build_stats_html(results) | |
| narrative = agent.narrative | |
| reflection = agent.reflection | |
| logs = "\n".join(agent.logs) | |
| dl_files = agent.get_download_files() | |
| return ( | |
| review_df, | |
| mapping_df, | |
| stats_html, | |
| narrative, | |
| reflection, | |
| logs, | |
| dl_files, | |
| ) | |
| except Exception as e: | |
| error_msg = f"β Error: {e}\n\n{traceback.format_exc()}" | |
| return ( | |
| EMPTY_REVIEW, | |
| EMPTY_MAPPING, | |
| build_stats_html(None), | |
| "", "", | |
| error_msg, | |
| [], | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| title="AI Topic Modelling β Academic Journal Analysis", | |
| ) as demo: | |
| # ββ Header ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML(f""" | |
| <div class="app-header"> | |
| <h1>Topic<span>Modeller</span> AI</h1> | |
| <p>NMF Topic Modelling Β· PAJAIS Taxonomy Mapping Β· LLM-Enhanced Labeling</p> | |
| </div> | |
| """) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SECTION 1 β Upload & Configure | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_classes="section-card"): | |
| gr.HTML('<div class="section-title">β Upload & Configure</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| file_input = gr.File( | |
| label="Upload CSV (requires 'title' and 'abstract' columns)", | |
| file_types=[".csv"], | |
| file_count="single", | |
| ) | |
| with gr.Column(scale=2): | |
| api_key_input = gr.Textbox( | |
| label="LLM API Key (optional β enhances labels)", | |
| placeholder="sk-... or gsk_... or your API key", | |
| type="password", | |
| ) | |
| provider_dropdown = gr.Dropdown( | |
| label="LLM Provider", | |
| choices=["Auto-detect", "Groq", "Mistral", "OpenAI"], | |
| value="Auto-detect", | |
| ) | |
| run_btn = gr.Button( | |
| "π Run Full Analysis", | |
| elem_classes="btn-run", | |
| size="lg", | |
| ) | |
| gr.HTML(f""" | |
| <div style="margin-top:12px;padding:10px 16px;background:{BG_MID}; | |
| border:1px solid {BORDER};border-radius:8px; | |
| font-size:11px;color:{TXT_MUTED};line-height:1.8; | |
| font-family:'JetBrains Mono',monospace;"> | |
| <b style="color:{ACCENT};">βΉοΈ Note:</b> Works <b>without</b> an API key | |
| (uses heuristic keyword labels). For LLM-enhanced labels, provide a | |
| <b>Groq</b> (free at | |
| <a href="https://console.groq.com" style="color:{ACCENT};">console.groq.com</a>), | |
| <b>Mistral</b>, or <b>OpenAI</b> key. | |
| Keys from <code>.env</code> file are loaded automatically. | |
| </div> | |
| """) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SECTION 2 β Results | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_classes="section-card"): | |
| gr.HTML('<div class="section-title">β‘ Results</div>') | |
| stats_display = gr.HTML(value=build_stats_html(None)) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Review Table (C4) βββββββββββββββββββββββββββββ | |
| with gr.Tab("π Review Table (C4)"): | |
| gr.HTML(f""" | |
| <p style="font-size:12px;color:{TXT_MUTED};margin:0 0 10px; | |
| font-family:'JetBrains Mono',monospace;"> | |
| All generated topics with IDs, source (title/abstract), | |
| keywords, and labels. Target: 98+ rows | |
| (50 title topics + 50 abstract topics). | |
| </p> | |
| """) | |
| review_table = gr.Dataframe( | |
| value=EMPTY_REVIEW, | |
| label="Topic Review Table", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| # ββ Tab 2: PAJAIS Mapping (C5) βββββββββββββββββββββββββββ | |
| with gr.Tab("πΊοΈ PAJAIS Mapping (C5)"): | |
| gr.HTML(f""" | |
| <p style="font-size:12px;color:{TXT_MUTED};margin:0 0 10px; | |
| font-family:'JetBrains Mono',monospace;"> | |
| Each topic mapped to the PAJAIS 25-category taxonomy. | |
| Status: <b style="color:{ACCENT};">MAPPED</b> (matches taxonomy) | |
| or <b style="color:{ACCENT2};">NOVEL</b> (new / unmapped theme). | |
| </p> | |
| """) | |
| mapping_table = gr.Dataframe( | |
| value=EMPTY_MAPPING, | |
| label="PAJAIS Taxonomy Mapping", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| # ββ Tab 3: Narrative (C8) ββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Narrative (C8)"): | |
| narrative_output = gr.Textbox( | |
| label="Academic Narrative (~500 words)", | |
| lines=20, | |
| interactive=False, | |
| placeholder="Run analysis to generate the narrative...", | |
| ) | |
| # ββ Tab 4: Reflection (C10) ββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Reflection (C10)"): | |
| reflection_output = gr.Textbox( | |
| label="Research Reflection (~250 words)", | |
| lines=14, | |
| interactive=False, | |
| placeholder="Run analysis to generate the reflection...", | |
| ) | |
| # ββ Tab 5: Pipeline Logs βββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Pipeline Logs"): | |
| logs_output = gr.Textbox( | |
| label="Execution Log", | |
| lines=20, | |
| interactive=False, | |
| placeholder="Pipeline logs will appear here after analysis...", | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SECTION 3 β Downloads | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_classes="section-card"): | |
| gr.HTML('<div class="section-title">β’ Download Outputs</div>') | |
| gr.HTML(f""" | |
| <p style="font-size:12px;color:{TXT_MUTED};margin:0 0 12px; | |
| font-family:'JetBrains Mono',monospace;"> | |
| Generated files: <code>comparison.csv</code> (C6) Β· | |
| <code>taxonomy_map.json</code> (C7) Β· | |
| <code>narrative.txt</code> (C8) Β· | |
| <code>prompts.txt</code> (C9) Β· | |
| <code>reflection.txt</code> (C10) | |
| </p> | |
| """) | |
| download_files = gr.File( | |
| label="Output Files", | |
| file_count="multiple", | |
| interactive=False, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SECTION 4 β Help | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_classes="section-card"): | |
| gr.HTML('<div class="section-title">β£ Help & Deliverables</div>') | |
| with gr.Accordion("π Assignment Deliverables Checklist", open=False): | |
| gr.HTML(f""" | |
| <div style="font-size:12px;color:{TXT};line-height:2; | |
| font-family:'JetBrains Mono',monospace;padding:8px;"> | |
| <b style="color:{ACCENT};">C1.</b> HuggingFace-compatible Gradio app β<br/> | |
| <b style="color:{ACCENT};">C2.</b> 3 Python files: app.py, tools.py, agent.py β<br/> | |
| <b style="color:{ACCENT};">C3.</b> requirements.txt with 13 packages β<br/> | |
| <b style="color:{ACCENT};">C4.</b> Review table (98+ topics) β "Review Table" tab β<br/> | |
| <b style="color:{ACCENT};">C5.</b> PAJAIS mapping (MAPPED/NOVEL) β "PAJAIS Mapping" tab β<br/> | |
| <b style="color:{ACCENT};">C6.</b> comparison.csv β Downloads β<br/> | |
| <b style="color:{ACCENT};">C7.</b> taxonomy_map.json β Downloads β<br/> | |
| <b style="color:{ACCENT};">C8.</b> narrative.txt (~500 words) β "Narrative" tab β<br/> | |
| <b style="color:{ACCENT};">C9.</b> prompts.txt β Downloads β<br/> | |
| <b style="color:{ACCENT};">C10.</b> reflection.txt (~250 words) β "Reflection" tab β<br/> | |
| </div> | |
| """) | |
| with gr.Accordion("β οΈ Troubleshooting", open=False): | |
| gr.HTML(f""" | |
| <div style="font-size:11px;color:{TXT};line-height:1.8; | |
| font-family:'JetBrains Mono',monospace;padding:8px;"> | |
| <b style="color:#ff6b6b;">Missing columns error</b><br/> | |
| Ensure CSV has lowercase 'title' and 'abstract' columns.<br/><br/> | |
| <b style="color:#ff6b6b;">Rate limit (429 error)</b><br/> | |
| LLM API rate limit. Wait 1-2 min and retry. App still works | |
| without LLM (uses heuristic labels).<br/><br/> | |
| <b style="color:#ff6b6b;">Too few topics</b><br/> | |
| Dataset may be too small. Minimum ~20 papers recommended | |
| for meaningful topic modelling.<br/><br/> | |
| <b style="color:#ff6b6b;">Empty narrative/reflection</b><br/> | |
| If LLM unavailable, template-based text is used automatically.<br/> | |
| </div> | |
| """) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Wire Events | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| run_btn.click( | |
| fn=run_analysis, | |
| inputs=[file_input, api_key_input, provider_dropdown], | |
| outputs=[ | |
| review_table, | |
| mapping_table, | |
| stats_display, | |
| narrative_output, | |
| reflection_output, | |
| logs_output, | |
| download_files, | |
| ], | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Launch | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| demo.launch( | |
| show_error=True, | |
| theme=gr.themes.Base( | |
| primary_hue="teal", | |
| secondary_hue="purple", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| font_mono=gr.themes.GoogleFont("JetBrains Mono"), | |
| ), | |
| css=CSS, | |
| ) |