Spaces:
Runtime error
Runtime error
| """ | |
| app.py β Gradio front-end for the Topic Modelling System. | |
| Runs on HuggingFace Spaces and also accepts CLI: python app.py data.csv | |
| """ | |
| import sys | |
| import os | |
| import tempfile | |
| import logging | |
| import pandas as pd | |
| import gradio as gr | |
| from agent import run_pipeline | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") | |
| log = logging.getLogger(__name__) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Core processing wrapper for Gradio | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def process_csv(csv_file) -> tuple: | |
| """ | |
| Gradio handler: receives an uploaded file object, runs the full pipeline, | |
| and returns display-ready outputs. | |
| Returns | |
| ------- | |
| ( | |
| status_msg : str, | |
| review_df : pd.DataFrame β rendered in Gradio Dataframe, | |
| comparison_df : pd.DataFrame, | |
| gap_md : str β gap analysis as Markdown, | |
| narrative : str, | |
| comp_file : str β path to comparison.csv for download, | |
| tax_file : str β path to taxonomy_map.json for download, | |
| narr_file : str β path to narrative.txt for download, | |
| ) | |
| """ | |
| if csv_file is None: | |
| empty = pd.DataFrame() | |
| return ("β οΈ Please upload a CSV file.", empty, empty, "", "", None, None, None) | |
| try: | |
| # csv_file.name is the temp-file path Gradio writes for us | |
| csv_path = csv_file.name if hasattr(csv_file, "name") else csv_file | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| result = run_pipeline(csv_path, output_dir=tmpdir) | |
| # Copy output files to a permanent temp location so Gradio can serve them | |
| import shutil | |
| out_dir = tempfile.mkdtemp() | |
| comp_dst = os.path.join(out_dir, "comparison.csv") | |
| tax_dst = os.path.join(out_dir, "taxonomy_map.json") | |
| narr_dst = os.path.join(out_dir, "narrative.txt") | |
| shutil.copy(os.path.join(tmpdir, "comparison.csv"), comp_dst) | |
| shutil.copy(os.path.join(tmpdir, "taxonomy_map.json"), tax_dst) | |
| shutil.copy(os.path.join(tmpdir, "narrative.txt"), narr_dst) | |
| gap = result["gap"] | |
| gap_md = f"""### Gap Analysis Summary | |
| | Metric | Value | | |
| |--------|-------| | |
| | **Total Topics Extracted** | {gap['total_topics']} | | |
| | **MAPPED (in PAJAIS)** | {gap['mapped_count']} ({gap['mapped_percent']}%) | | |
| | **NOVEL (emerging)** | {gap['novel_count']} ({gap['novel_percent']}%) | | |
| | **Records Processed** | {result['record_count']} | | |
| **Top MAPPED themes:** {', '.join(gap['top_mapped'])} | |
| **Top NOVEL themes:** {', '.join(gap['top_novel'])} | |
| """ | |
| status = ( | |
| f"β Pipeline completed successfully!\n" | |
| f" π {result['record_count']} records processed | " | |
| f"π·οΈ {gap['total_topics']} topics extracted | " | |
| f"ποΈ {gap['mapped_count']} mapped | " | |
| f"β¨ {gap['novel_count']} novel" | |
| ) | |
| return ( | |
| status, | |
| result["review_df"], | |
| result["comparison_df"], | |
| gap_md, | |
| result["narrative"], | |
| comp_dst, | |
| tax_dst, | |
| narr_dst, | |
| ) | |
| except Exception as exc: | |
| log.exception("Pipeline failed") | |
| empty = pd.DataFrame() | |
| return (f"β Error: {exc}", empty, empty, "", "", None, None, None) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_ui() -> gr.Blocks: | |
| css = """ | |
| .status-box textarea { font-size: 0.95rem; font-family: monospace; } | |
| .narrative-box textarea { font-size: 0.9rem; line-height: 1.6; } | |
| """ | |
| with gr.Blocks( | |
| title="Topic Modelling System", | |
| theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), | |
| css=css, | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # π Topic Modelling System | |
| **Automated research-theme extraction, PAJAIS mapping, and gap analysis** | |
| Upload a CSV file containing `title` and `abstract` columns to begin. | |
| The system will extract β₯ 98 topics, compare title vs abstract themes, | |
| map topics against the PAJAIS taxonomy, and generate a 500-word academic narrative. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| csv_input = gr.File( | |
| label="π Upload CSV (title + abstract columns)", | |
| file_types=[".csv"], | |
| type="filepath", | |
| ) | |
| run_btn = gr.Button("π Run Analysis", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| status_out = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| lines=3, | |
| elem_classes=["status-box"], | |
| ) | |
| gr.Markdown("---") | |
| with gr.Tabs(): | |
| with gr.TabItem("π·οΈ Extracted Topics"): | |
| review_table = gr.Dataframe( | |
| label="Topic Review Table (topic_id | keyword | frequency)", | |
| wrap=True, | |
| interactive=False, | |
| ) | |
| with gr.TabItem("π Title vs Abstract Comparison"): | |
| comparison_table = gr.Dataframe( | |
| label="Comparison Table", | |
| wrap=True, | |
| interactive=False, | |
| ) | |
| with gr.TabItem("π Gap Analysis"): | |
| gap_md_out = gr.Markdown() | |
| with gr.TabItem("π Narrative (β500 words)"): | |
| narrative_out = gr.Textbox( | |
| label="Academic Narrative", | |
| lines=28, | |
| interactive=False, | |
| elem_classes=["narrative-box"], | |
| ) | |
| gr.Markdown("### π₯ Download Output Files") | |
| with gr.Row(): | |
| dl_comparison = gr.File(label="comparison.csv", interactive=False) | |
| dl_taxonomy = gr.File(label="taxonomy_map.json", interactive=False) | |
| dl_narrative = gr.File(label="narrative.txt", interactive=False) | |
| run_btn.click( | |
| fn=process_csv, | |
| inputs=[csv_input], | |
| outputs=[ | |
| status_out, | |
| review_table, | |
| comparison_table, | |
| gap_md_out, | |
| narrative_out, | |
| dl_comparison, | |
| dl_taxonomy, | |
| dl_narrative, | |
| ], | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| *Topic Modelling System β powered by TF-IDF Β· LDA Β· NMF* | |
| """ | |
| ) | |
| return demo | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Entry point | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| # CLI mode: python app.py data.csv | |
| if len(sys.argv) > 1: | |
| csv_path = sys.argv[1] | |
| if not os.path.isfile(csv_path): | |
| print(f"[ERROR] File not found: {csv_path}") | |
| sys.exit(1) | |
| print(f"[CLI] Running pipeline on: {csv_path}") | |
| result = run_pipeline(csv_path, output_dir=".") | |
| print("\n" + "=" * 60) | |
| print("PIPELINE COMPLETE") | |
| print("=" * 60) | |
| print(f" Records processed : {result['record_count']}") | |
| print(f" Topics extracted : {result['gap']['total_topics']}") | |
| print(f" MAPPED : {result['gap']['mapped_count']} ({result['gap']['mapped_percent']}%)") | |
| print(f" NOVEL : {result['gap']['novel_count']} ({result['gap']['novel_percent']}%)") | |
| print(f" Narrative words : {len(result['narrative'].split())}") | |
| print("\nOutput files:") | |
| for f in result["output_files"]: | |
| print(f" β {f}") | |
| print("=" * 60) | |
| else: | |
| # Gradio / HuggingFace Spaces mode | |
| demo = build_ui() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |