reyansh2005's picture
nice
62e2807
"""
app.py β€” Gradio Web Application (Entry Point)
AI-Powered Topic Modelling for Academic Journal Analysis.
Satisfies all deliverables C1–C10.
Run with: python app.py
"""
from __future__ import annotations
import os
import traceback
import pandas as pd
import gradio as gr
from pathlib import Path
# ── Load .env file (API keys) ────────────────────────────────────────────
def _load_env() -> None:
env_path = Path(__file__).parent / ".env"
if not env_path.exists():
return
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, val = line.split("=", 1)
key = key.strip()
val = val.strip().strip('"').strip("'")
if val and not os.getenv(key):
os.environ[key] = val
_load_env()
# ── Import the agent ─────────────────────────────────────────────────────
from agent import TopicModellingAgent
# ════════════════════════════════════════════════════════════════════════════
# Colour / Style Tokens
# ════════════════════════════════════════════════════════════════════════════
ACCENT = "#00d4aa"
ACCENT2 = "#7c3aed"
BG_DARK = "#0f0f1a"
BG_MID = "#161625"
BG_CARD = "#1c1c32"
TXT = "#e2e8f0"
TXT_MUTED = "#94a3b8"
BORDER = "#2d2d44"
# ════════════════════════════════════════════════════════════════════════════
# Custom CSS
# ════════════════════════════════════════════════════════════════════════════
CSS = f"""
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@300;400;500;600;700&display=swap');
:root {{
--accent: {ACCENT};
--accent2: {ACCENT2};
--bg-dark: {BG_DARK};
--bg-mid: {BG_MID};
--bg-card: {BG_CARD};
--txt: {TXT};
--txt-muted: {TXT_MUTED};
--border: {BORDER};
}}
/* ── Global ── */
body, .gradio-container {{
background: var(--bg-dark) !important;
color: var(--txt) !important;
font-family: 'Inter', -apple-system, sans-serif !important;
}}
/* ── Header ── */
.app-header {{
background: linear-gradient(135deg, {BG_CARD} 0%, {BG_MID} 100%);
border: 1px solid {BORDER};
border-radius: 16px;
padding: 32px 40px;
margin-bottom: 24px;
position: relative;
overflow: hidden;
}}
.app-header::before {{
content: '';
position: absolute;
top: -50%; left: -50%;
width: 200%; height: 200%;
background:
radial-gradient(circle at 30% 50%, {ACCENT}08 0%, transparent 50%),
radial-gradient(circle at 70% 50%, {ACCENT2}06 0%, transparent 50%);
pointer-events: none;
}}
.app-header h1 {{
font-family: 'Inter', sans-serif;
font-size: 2rem; font-weight: 800;
color: {TXT}; margin: 0 0 6px;
letter-spacing: -0.03em;
position: relative;
}}
.app-header h1 span {{ color: {ACCENT}; }}
.app-header p {{
color: {TXT_MUTED}; font-size: 13px; margin: 0;
font-family: 'JetBrains Mono', monospace;
letter-spacing: 1px; position: relative;
}}
/* ── Section cards ── */
.section-card {{
background: {BG_CARD};
border: 1px solid {BORDER};
border-radius: 12px;
padding: 20px 24px;
margin-bottom: 16px;
}}
.section-title {{
font-family: 'JetBrains Mono', monospace;
font-size: 11px; font-weight: 700;
letter-spacing: 3px; text-transform: uppercase;
color: {ACCENT};
margin-bottom: 16px; padding-bottom: 10px;
border-bottom: 1px solid {BORDER};
}}
/* ── Run button ── */
.btn-run {{
background: linear-gradient(135deg, {ACCENT} 0%, #00b894 100%) !important;
color: {BG_DARK} !important;
border: none !important; border-radius: 10px !important;
font-family: 'JetBrains Mono', monospace !important;
font-weight: 700 !important; font-size: 14px !important;
letter-spacing: 1px !important;
padding: 14px 28px !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 15px {ACCENT}30 !important;
}}
.btn-run:hover {{
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px {ACCENT}50 !important;
}}
/* ── Tab navigation ── */
.tab-nav button {{
font-family: 'JetBrains Mono', monospace !important;
font-size: 12px !important; font-weight: 600 !important;
letter-spacing: 0.5px !important;
color: {TXT_MUTED} !important;
border: none !important; background: transparent !important;
padding: 10px 16px !important;
transition: all 0.2s !important;
}}
.tab-nav button.selected {{
color: {ACCENT} !important;
border-bottom: 2px solid {ACCENT} !important;
}}
/* ── Dataframe styling ── */
.gradio-dataframe table {{
background: {BG_MID} !important;
border-collapse: collapse;
}}
.gradio-dataframe th {{
background: {BG_CARD} !important;
color: {ACCENT} !important;
font-family: 'JetBrains Mono', monospace !important;
font-size: 11px !important;
letter-spacing: 1px !important;
text-transform: uppercase !important;
padding: 10px 12px !important;
border-bottom: 1px solid {BORDER} !important;
}}
.gradio-dataframe td {{
color: {TXT} !important;
font-size: 12px !important;
padding: 8px 12px !important;
border-bottom: 1px solid {BORDER}80 !important;
font-family: 'JetBrains Mono', monospace !important;
}}
.gradio-dataframe tr:hover td {{
background: {ACCENT}08 !important;
}}
/* ── Inputs ── */
input, textarea, select, .gradio-textbox textarea {{
background: {BG_MID} !important;
border: 1px solid {BORDER} !important;
color: {TXT} !important;
font-family: 'JetBrains Mono', monospace !important;
font-size: 13px !important;
border-radius: 8px !important;
}}
input:focus, textarea:focus {{
border-color: {ACCENT} !important;
box-shadow: 0 0 0 2px {ACCENT}30 !important;
}}
/* ── Labels ── */
label, .gradio-label {{
color: {TXT_MUTED} !important;
font-family: 'JetBrains Mono', monospace !important;
font-size: 11px !important;
letter-spacing: 1px !important;
text-transform: uppercase !important;
}}
/* ── File upload ── */
.gradio-file {{
background: {BG_CARD} !important;
border: 2px dashed {BORDER} !important;
border-radius: 10px !important;
transition: border-color 0.3s !important;
}}
.gradio-file:hover {{
border-color: {ACCENT} !important;
}}
/* ── Accordion ── */
.gradio-accordion {{
background: {BG_CARD} !important;
border: 1px solid {BORDER} !important;
border-radius: 10px !important;
}}
/* ── Stats badges ── */
.stats-badge {{
display: inline-block;
background: {ACCENT}15;
border: 1px solid {ACCENT}40;
border-radius: 8px;
padding: 10px 18px; margin: 4px 6px;
font-family: 'JetBrains Mono', monospace;
font-size: 13px; color: {ACCENT};
}}
.stats-badge.novel {{
background: {ACCENT2}15;
border-color: {ACCENT2}40;
color: {ACCENT2};
}}
.stats-badge b {{
font-size: 18px;
display: block;
margin-top: 2px;
}}
"""
# ════════════════════════════════════════════════════════════════════════════
# Helper Functions
# ════════════════════════════════════════════════════════════════════════════
def build_stats_html(results: dict | None) -> str:
"""Build the statistics summary HTML panel."""
if not results:
return (
f'<div style="color:{TXT_MUTED};text-align:center;padding:40px;'
f'font-family:JetBrains Mono,monospace;font-size:13px;">'
f'Upload a CSV and click <b style="color:{ACCENT};">Run Analysis</b> '
f'to see results.</div>'
)
total = results.get("total_topics", 0)
title_n = results.get("title_topics", 0)
abstract_n = results.get("abstract_topics", 0)
mapped = results.get("mapped", 0)
novel = results.get("novel", 0)
return f"""
<div style="display:flex;flex-wrap:wrap;gap:12px;justify-content:center;padding:20px 10px;">
<div class="stats-badge">πŸ“Š Total Topics<b>{total}</b></div>
<div class="stats-badge">πŸ“ Title Topics<b>{title_n}</b></div>
<div class="stats-badge">πŸ“„ Abstract Topics<b>{abstract_n}</b></div>
<div class="stats-badge">βœ… MAPPED<b>{mapped}</b></div>
<div class="stats-badge novel">πŸ†• NOVEL<b>{novel}</b></div>
</div>
"""
# ════════════════════════════════════════════════════════════════════════════
# Main Analysis Handler
# ════════════════════════════════════════════════════════════════════════════
EMPTY_REVIEW = pd.DataFrame(columns=["topic_id", "source", "keywords", "label"])
EMPTY_MAPPING = pd.DataFrame(
columns=["topic_id", "source", "label", "pajais_category", "status", "confidence"]
)
def run_analysis(file_obj, api_key, provider, progress=gr.Progress()):
"""Run the full topic modelling pipeline and return all UI outputs."""
empty = (
EMPTY_REVIEW,
EMPTY_MAPPING,
build_stats_html(None),
"", "",
"",
[],
)
# ── Validate upload ──────────────────────────────────────────────
if file_obj is None:
return (
EMPTY_REVIEW,
EMPTY_MAPPING,
build_stats_html(None),
"", "",
"❌ No file uploaded. Please upload a CSV file first.",
[],
)
# ── Resolve provider ─────────────────────────────────────────────
prov = None if provider == "Auto-detect" else provider.lower()
key = api_key.strip() if api_key else None
agent = TopicModellingAgent(api_key=key, provider=prov)
try:
# ── Run pipeline ─────────────────────────────────────────────
csv_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
results = agent.run_pipeline(csv_path, progress_callback=progress)
review_df = agent.get_review_table()
mapping_df = agent.get_mapping_table()
stats_html = build_stats_html(results)
narrative = agent.narrative
reflection = agent.reflection
logs = "\n".join(agent.logs)
dl_files = agent.get_download_files()
return (
review_df,
mapping_df,
stats_html,
narrative,
reflection,
logs,
dl_files,
)
except Exception as e:
error_msg = f"❌ Error: {e}\n\n{traceback.format_exc()}"
return (
EMPTY_REVIEW,
EMPTY_MAPPING,
build_stats_html(None),
"", "",
error_msg,
[],
)
# ════════════════════════════════════════════════════════════════════════════
# Gradio UI
# ════════════════════════════════════════════════════════════════════════════
with gr.Blocks(
title="AI Topic Modelling β€” Academic Journal Analysis",
) as demo:
# ── Header ────────────────────────────────────────────────────────────
gr.HTML(f"""
<div class="app-header">
<h1>Topic<span>Modeller</span> AI</h1>
<p>NMF Topic Modelling Β· PAJAIS Taxonomy Mapping Β· LLM-Enhanced Labeling</p>
</div>
""")
# ════════════════════════════════════════════════════════════════════════
# SECTION 1 β€” Upload & Configure
# ════════════════════════════════════════════════════════════════════════
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="section-title">β‘  Upload & Configure</div>')
with gr.Row():
with gr.Column(scale=3):
file_input = gr.File(
label="Upload CSV (requires 'title' and 'abstract' columns)",
file_types=[".csv"],
file_count="single",
)
with gr.Column(scale=2):
api_key_input = gr.Textbox(
label="LLM API Key (optional β€” enhances labels)",
placeholder="sk-... or gsk_... or your API key",
type="password",
)
provider_dropdown = gr.Dropdown(
label="LLM Provider",
choices=["Auto-detect", "Groq", "Mistral", "OpenAI"],
value="Auto-detect",
)
run_btn = gr.Button(
"πŸš€ Run Full Analysis",
elem_classes="btn-run",
size="lg",
)
gr.HTML(f"""
<div style="margin-top:12px;padding:10px 16px;background:{BG_MID};
border:1px solid {BORDER};border-radius:8px;
font-size:11px;color:{TXT_MUTED};line-height:1.8;
font-family:'JetBrains Mono',monospace;">
<b style="color:{ACCENT};">ℹ️ Note:</b> Works <b>without</b> an API key
(uses heuristic keyword labels). For LLM-enhanced labels, provide a
<b>Groq</b> (free at
<a href="https://console.groq.com" style="color:{ACCENT};">console.groq.com</a>),
<b>Mistral</b>, or <b>OpenAI</b> key.
Keys from <code>.env</code> file are loaded automatically.
</div>
""")
# ════════════════════════════════════════════════════════════════════════
# SECTION 2 β€” Results
# ════════════════════════════════════════════════════════════════════════
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="section-title">β‘‘ Results</div>')
stats_display = gr.HTML(value=build_stats_html(None))
with gr.Tabs():
# ── Tab 1: Review Table (C4) ─────────────────────────────
with gr.Tab("πŸ“Š Review Table (C4)"):
gr.HTML(f"""
<p style="font-size:12px;color:{TXT_MUTED};margin:0 0 10px;
font-family:'JetBrains Mono',monospace;">
All generated topics with IDs, source (title/abstract),
keywords, and labels. Target: 98+ rows
(50 title topics + 50 abstract topics).
</p>
""")
review_table = gr.Dataframe(
value=EMPTY_REVIEW,
label="Topic Review Table",
interactive=False,
wrap=True,
)
# ── Tab 2: PAJAIS Mapping (C5) ───────────────────────────
with gr.Tab("πŸ—ΊοΈ PAJAIS Mapping (C5)"):
gr.HTML(f"""
<p style="font-size:12px;color:{TXT_MUTED};margin:0 0 10px;
font-family:'JetBrains Mono',monospace;">
Each topic mapped to the PAJAIS 25-category taxonomy.
Status: <b style="color:{ACCENT};">MAPPED</b> (matches taxonomy)
or <b style="color:{ACCENT2};">NOVEL</b> (new / unmapped theme).
</p>
""")
mapping_table = gr.Dataframe(
value=EMPTY_MAPPING,
label="PAJAIS Taxonomy Mapping",
interactive=False,
wrap=True,
)
# ── Tab 3: Narrative (C8) ────────────────────────────────
with gr.Tab("πŸ“ Narrative (C8)"):
narrative_output = gr.Textbox(
label="Academic Narrative (~500 words)",
lines=20,
interactive=False,
placeholder="Run analysis to generate the narrative...",
)
# ── Tab 4: Reflection (C10) ──────────────────────────────
with gr.Tab("πŸ’­ Reflection (C10)"):
reflection_output = gr.Textbox(
label="Research Reflection (~250 words)",
lines=14,
interactive=False,
placeholder="Run analysis to generate the reflection...",
)
# ── Tab 5: Pipeline Logs ─────────────────────────────────
with gr.Tab("πŸ“‹ Pipeline Logs"):
logs_output = gr.Textbox(
label="Execution Log",
lines=20,
interactive=False,
placeholder="Pipeline logs will appear here after analysis...",
)
# ════════════════════════════════════════════════════════════════════════
# SECTION 3 β€” Downloads
# ════════════════════════════════════════════════════════════════════════
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="section-title">β‘’ Download Outputs</div>')
gr.HTML(f"""
<p style="font-size:12px;color:{TXT_MUTED};margin:0 0 12px;
font-family:'JetBrains Mono',monospace;">
Generated files: <code>comparison.csv</code> (C6) Β·
<code>taxonomy_map.json</code> (C7) Β·
<code>narrative.txt</code> (C8) Β·
<code>prompts.txt</code> (C9) Β·
<code>reflection.txt</code> (C10)
</p>
""")
download_files = gr.File(
label="Output Files",
file_count="multiple",
interactive=False,
)
# ════════════════════════════════════════════════════════════════════════
# SECTION 4 β€” Help
# ════════════════════════════════════════════════════════════════════════
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="section-title">β‘£ Help & Deliverables</div>')
with gr.Accordion("πŸ“‹ Assignment Deliverables Checklist", open=False):
gr.HTML(f"""
<div style="font-size:12px;color:{TXT};line-height:2;
font-family:'JetBrains Mono',monospace;padding:8px;">
<b style="color:{ACCENT};">C1.</b> HuggingFace-compatible Gradio app βœ“<br/>
<b style="color:{ACCENT};">C2.</b> 3 Python files: app.py, tools.py, agent.py βœ“<br/>
<b style="color:{ACCENT};">C3.</b> requirements.txt with 13 packages βœ“<br/>
<b style="color:{ACCENT};">C4.</b> Review table (98+ topics) β†’ "Review Table" tab βœ“<br/>
<b style="color:{ACCENT};">C5.</b> PAJAIS mapping (MAPPED/NOVEL) β†’ "PAJAIS Mapping" tab βœ“<br/>
<b style="color:{ACCENT};">C6.</b> comparison.csv β†’ Downloads βœ“<br/>
<b style="color:{ACCENT};">C7.</b> taxonomy_map.json β†’ Downloads βœ“<br/>
<b style="color:{ACCENT};">C8.</b> narrative.txt (~500 words) β†’ "Narrative" tab βœ“<br/>
<b style="color:{ACCENT};">C9.</b> prompts.txt β†’ Downloads βœ“<br/>
<b style="color:{ACCENT};">C10.</b> reflection.txt (~250 words) β†’ "Reflection" tab βœ“<br/>
</div>
""")
with gr.Accordion("⚠️ Troubleshooting", open=False):
gr.HTML(f"""
<div style="font-size:11px;color:{TXT};line-height:1.8;
font-family:'JetBrains Mono',monospace;padding:8px;">
<b style="color:#ff6b6b;">Missing columns error</b><br/>
Ensure CSV has lowercase 'title' and 'abstract' columns.<br/><br/>
<b style="color:#ff6b6b;">Rate limit (429 error)</b><br/>
LLM API rate limit. Wait 1-2 min and retry. App still works
without LLM (uses heuristic labels).<br/><br/>
<b style="color:#ff6b6b;">Too few topics</b><br/>
Dataset may be too small. Minimum ~20 papers recommended
for meaningful topic modelling.<br/><br/>
<b style="color:#ff6b6b;">Empty narrative/reflection</b><br/>
If LLM unavailable, template-based text is used automatically.<br/>
</div>
""")
# ════════════════════════════════════════════════════════════════════════
# Wire Events
# ════════════════════════════════════════════════════════════════════════
run_btn.click(
fn=run_analysis,
inputs=[file_input, api_key_input, provider_dropdown],
outputs=[
review_table,
mapping_table,
stats_display,
narrative_output,
reflection_output,
logs_output,
download_files,
],
)
# ════════════════════════════════════════════════════════════════════════════
# Launch
# ════════════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
demo.launch(
show_error=True,
theme=gr.themes.Base(
primary_hue="teal",
secondary_hue="purple",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
),
css=CSS,
)