""" app.py – Gradio front-end for the Topic Modelling System. Runs on HuggingFace Spaces and also accepts CLI: python app.py data.csv """ import sys import os import tempfile import logging import pandas as pd import gradio as gr from agent import run_pipeline logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") log = logging.getLogger(__name__) # ───────────────────────────────────────────────────────────────────────────── # Core processing wrapper for Gradio # ───────────────────────────────────────────────────────────────────────────── def process_csv(csv_file) -> tuple: """ Gradio handler: receives an uploaded file object, runs the full pipeline, and returns display-ready outputs. Returns ------- ( status_msg : str, review_df : pd.DataFrame – rendered in Gradio Dataframe, comparison_df : pd.DataFrame, gap_md : str – gap analysis as Markdown, narrative : str, comp_file : str – path to comparison.csv for download, tax_file : str – path to taxonomy_map.json for download, narr_file : str – path to narrative.txt for download, ) """ if csv_file is None: empty = pd.DataFrame() return ("⚠️ Please upload a CSV file.", empty, empty, "", "", None, None, None) try: # csv_file.name is the temp-file path Gradio writes for us csv_path = csv_file.name if hasattr(csv_file, "name") else csv_file with tempfile.TemporaryDirectory() as tmpdir: result = run_pipeline(csv_path, output_dir=tmpdir) # Copy output files to a permanent temp location so Gradio can serve them import shutil out_dir = tempfile.mkdtemp() comp_dst = os.path.join(out_dir, "comparison.csv") tax_dst = os.path.join(out_dir, "taxonomy_map.json") narr_dst = os.path.join(out_dir, "narrative.txt") shutil.copy(os.path.join(tmpdir, "comparison.csv"), comp_dst) shutil.copy(os.path.join(tmpdir, "taxonomy_map.json"), tax_dst) shutil.copy(os.path.join(tmpdir, "narrative.txt"), narr_dst) gap = result["gap"] gap_md = f"""### Gap Analysis Summary | Metric | Value | |--------|-------| | **Total Topics Extracted** | {gap['total_topics']} | | **MAPPED (in PAJAIS)** | {gap['mapped_count']} ({gap['mapped_percent']}%) | | **NOVEL (emerging)** | {gap['novel_count']} ({gap['novel_percent']}%) | | **Records Processed** | {result['record_count']} | **Top MAPPED themes:** {', '.join(gap['top_mapped'])} **Top NOVEL themes:** {', '.join(gap['top_novel'])} """ status = ( f"✅ Pipeline completed successfully!\n" f" 📄 {result['record_count']} records processed | " f"🏷️ {gap['total_topics']} topics extracted | " f"🗂️ {gap['mapped_count']} mapped | " f"✨ {gap['novel_count']} novel" ) return ( status, result["review_df"], result["comparison_df"], gap_md, result["narrative"], comp_dst, tax_dst, narr_dst, ) except Exception as exc: log.exception("Pipeline failed") empty = pd.DataFrame() return (f"❌ Error: {exc}", empty, empty, "", "", None, None, None) # ───────────────────────────────────────────────────────────────────────────── # Gradio UI # ───────────────────────────────────────────────────────────────────────────── def build_ui() -> gr.Blocks: css = """ .status-box textarea { font-size: 0.95rem; font-family: monospace; } .narrative-box textarea { font-size: 0.9rem; line-height: 1.6; } """ with gr.Blocks( title="Topic Modelling System", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=css, ) as demo: gr.Markdown( """ # 📚 Topic Modelling System **Automated research-theme extraction, PAJAIS mapping, and gap analysis** Upload a CSV file containing `title` and `abstract` columns to begin. The system will extract ≥ 98 topics, compare title vs abstract themes, map topics against the PAJAIS taxonomy, and generate a 500-word academic narrative. """ ) with gr.Row(): with gr.Column(scale=1): csv_input = gr.File( label="📂 Upload CSV (title + abstract columns)", file_types=[".csv"], type="filepath", ) run_btn = gr.Button("🚀 Run Analysis", variant="primary", size="lg") with gr.Column(scale=2): status_out = gr.Textbox( label="Status", interactive=False, lines=3, elem_classes=["status-box"], ) gr.Markdown("---") with gr.Tabs(): with gr.TabItem("🏷️ Extracted Topics"): review_table = gr.Dataframe( label="Topic Review Table (topic_id | keyword | frequency)", wrap=True, interactive=False, ) with gr.TabItem("🔄 Title vs Abstract Comparison"): comparison_table = gr.Dataframe( label="Comparison Table", wrap=True, interactive=False, ) with gr.TabItem("📊 Gap Analysis"): gap_md_out = gr.Markdown() with gr.TabItem("📝 Narrative (≈500 words)"): narrative_out = gr.Textbox( label="Academic Narrative", lines=28, interactive=False, elem_classes=["narrative-box"], ) gr.Markdown("### 📥 Download Output Files") with gr.Row(): dl_comparison = gr.File(label="comparison.csv", interactive=False) dl_taxonomy = gr.File(label="taxonomy_map.json", interactive=False) dl_narrative = gr.File(label="narrative.txt", interactive=False) run_btn.click( fn=process_csv, inputs=[csv_input], outputs=[ status_out, review_table, comparison_table, gap_md_out, narrative_out, dl_comparison, dl_taxonomy, dl_narrative, ], ) gr.Markdown( """ --- *Topic Modelling System — powered by TF-IDF · LDA · NMF* """ ) return demo # ───────────────────────────────────────────────────────────────────────────── # Entry point # ───────────────────────────────────────────────────────────────────────────── if __name__ == "__main__": # CLI mode: python app.py data.csv if len(sys.argv) > 1: csv_path = sys.argv[1] if not os.path.isfile(csv_path): print(f"[ERROR] File not found: {csv_path}") sys.exit(1) print(f"[CLI] Running pipeline on: {csv_path}") result = run_pipeline(csv_path, output_dir=".") print("\n" + "=" * 60) print("PIPELINE COMPLETE") print("=" * 60) print(f" Records processed : {result['record_count']}") print(f" Topics extracted : {result['gap']['total_topics']}") print(f" MAPPED : {result['gap']['mapped_count']} ({result['gap']['mapped_percent']}%)") print(f" NOVEL : {result['gap']['novel_count']} ({result['gap']['novel_percent']}%)") print(f" Narrative words : {len(result['narrative'].split())}") print("\nOutput files:") for f in result["output_files"]: print(f" → {f}") print("=" * 60) else: # Gradio / HuggingFace Spaces mode demo = build_ui() demo.launch(server_name="0.0.0.0", server_port=7860)