tryH / app.py
ronitsonawane24's picture
Upload 4 files
b7e9bf6 verified
"""
app.py – Gradio front-end for the Topic Modelling System.
Runs on HuggingFace Spaces and also accepts CLI: python app.py data.csv
"""
import sys
import os
import tempfile
import logging
import pandas as pd
import gradio as gr
from agent import run_pipeline
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
# Core processing wrapper for Gradio
# ─────────────────────────────────────────────────────────────────────────────
def process_csv(csv_file) -> tuple:
"""
Gradio handler: receives an uploaded file object, runs the full pipeline,
and returns display-ready outputs.
Returns
-------
(
status_msg : str,
review_df : pd.DataFrame – rendered in Gradio Dataframe,
comparison_df : pd.DataFrame,
gap_md : str – gap analysis as Markdown,
narrative : str,
comp_file : str – path to comparison.csv for download,
tax_file : str – path to taxonomy_map.json for download,
narr_file : str – path to narrative.txt for download,
)
"""
if csv_file is None:
empty = pd.DataFrame()
return ("⚠️ Please upload a CSV file.", empty, empty, "", "", None, None, None)
try:
# csv_file.name is the temp-file path Gradio writes for us
csv_path = csv_file.name if hasattr(csv_file, "name") else csv_file
with tempfile.TemporaryDirectory() as tmpdir:
result = run_pipeline(csv_path, output_dir=tmpdir)
# Copy output files to a permanent temp location so Gradio can serve them
import shutil
out_dir = tempfile.mkdtemp()
comp_dst = os.path.join(out_dir, "comparison.csv")
tax_dst = os.path.join(out_dir, "taxonomy_map.json")
narr_dst = os.path.join(out_dir, "narrative.txt")
shutil.copy(os.path.join(tmpdir, "comparison.csv"), comp_dst)
shutil.copy(os.path.join(tmpdir, "taxonomy_map.json"), tax_dst)
shutil.copy(os.path.join(tmpdir, "narrative.txt"), narr_dst)
gap = result["gap"]
gap_md = f"""### Gap Analysis Summary
| Metric | Value |
|--------|-------|
| **Total Topics Extracted** | {gap['total_topics']} |
| **MAPPED (in PAJAIS)** | {gap['mapped_count']} ({gap['mapped_percent']}%) |
| **NOVEL (emerging)** | {gap['novel_count']} ({gap['novel_percent']}%) |
| **Records Processed** | {result['record_count']} |
**Top MAPPED themes:** {', '.join(gap['top_mapped'])}
**Top NOVEL themes:** {', '.join(gap['top_novel'])}
"""
status = (
f"βœ… Pipeline completed successfully!\n"
f" πŸ“„ {result['record_count']} records processed | "
f"🏷️ {gap['total_topics']} topics extracted | "
f"πŸ—‚οΈ {gap['mapped_count']} mapped | "
f"✨ {gap['novel_count']} novel"
)
return (
status,
result["review_df"],
result["comparison_df"],
gap_md,
result["narrative"],
comp_dst,
tax_dst,
narr_dst,
)
except Exception as exc:
log.exception("Pipeline failed")
empty = pd.DataFrame()
return (f"❌ Error: {exc}", empty, empty, "", "", None, None, None)
# ─────────────────────────────────────────────────────────────────────────────
# Gradio UI
# ─────────────────────────────────────────────────────────────────────────────
def build_ui() -> gr.Blocks:
css = """
.status-box textarea { font-size: 0.95rem; font-family: monospace; }
.narrative-box textarea { font-size: 0.9rem; line-height: 1.6; }
"""
with gr.Blocks(
title="Topic Modelling System",
theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
css=css,
) as demo:
gr.Markdown(
"""
# πŸ“š Topic Modelling System
**Automated research-theme extraction, PAJAIS mapping, and gap analysis**
Upload a CSV file containing `title` and `abstract` columns to begin.
The system will extract β‰₯ 98 topics, compare title vs abstract themes,
map topics against the PAJAIS taxonomy, and generate a 500-word academic narrative.
"""
)
with gr.Row():
with gr.Column(scale=1):
csv_input = gr.File(
label="πŸ“‚ Upload CSV (title + abstract columns)",
file_types=[".csv"],
type="filepath",
)
run_btn = gr.Button("πŸš€ Run Analysis", variant="primary", size="lg")
with gr.Column(scale=2):
status_out = gr.Textbox(
label="Status",
interactive=False,
lines=3,
elem_classes=["status-box"],
)
gr.Markdown("---")
with gr.Tabs():
with gr.TabItem("🏷️ Extracted Topics"):
review_table = gr.Dataframe(
label="Topic Review Table (topic_id | keyword | frequency)",
wrap=True,
interactive=False,
)
with gr.TabItem("πŸ”„ Title vs Abstract Comparison"):
comparison_table = gr.Dataframe(
label="Comparison Table",
wrap=True,
interactive=False,
)
with gr.TabItem("πŸ“Š Gap Analysis"):
gap_md_out = gr.Markdown()
with gr.TabItem("πŸ“ Narrative (β‰ˆ500 words)"):
narrative_out = gr.Textbox(
label="Academic Narrative",
lines=28,
interactive=False,
elem_classes=["narrative-box"],
)
gr.Markdown("### πŸ“₯ Download Output Files")
with gr.Row():
dl_comparison = gr.File(label="comparison.csv", interactive=False)
dl_taxonomy = gr.File(label="taxonomy_map.json", interactive=False)
dl_narrative = gr.File(label="narrative.txt", interactive=False)
run_btn.click(
fn=process_csv,
inputs=[csv_input],
outputs=[
status_out,
review_table,
comparison_table,
gap_md_out,
narrative_out,
dl_comparison,
dl_taxonomy,
dl_narrative,
],
)
gr.Markdown(
"""
---
*Topic Modelling System β€” powered by TF-IDF Β· LDA Β· NMF*
"""
)
return demo
# ─────────────────────────────────────────────────────────────────────────────
# Entry point
# ─────────────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
# CLI mode: python app.py data.csv
if len(sys.argv) > 1:
csv_path = sys.argv[1]
if not os.path.isfile(csv_path):
print(f"[ERROR] File not found: {csv_path}")
sys.exit(1)
print(f"[CLI] Running pipeline on: {csv_path}")
result = run_pipeline(csv_path, output_dir=".")
print("\n" + "=" * 60)
print("PIPELINE COMPLETE")
print("=" * 60)
print(f" Records processed : {result['record_count']}")
print(f" Topics extracted : {result['gap']['total_topics']}")
print(f" MAPPED : {result['gap']['mapped_count']} ({result['gap']['mapped_percent']}%)")
print(f" NOVEL : {result['gap']['novel_count']} ({result['gap']['novel_percent']}%)")
print(f" Narrative words : {len(result['narrative'].split())}")
print("\nOutput files:")
for f in result["output_files"]:
print(f" β†’ {f}")
print("=" * 60)
else:
# Gradio / HuggingFace Spaces mode
demo = build_ui()
demo.launch(server_name="0.0.0.0", server_port=7860)