Spaces:

ronitsonawane24
/

tryH

Runtime error

App Files Files Community

tryH / app.py

ronitsonawane24

Upload 4 files

b7e9bf6 verified about 2 months ago

raw

history blame contribute delete

9.17 kB

	"""
	app.py – Gradio front-end for the Topic Modelling System.
	Runs on HuggingFace Spaces and also accepts CLI: python app.py data.csv
	"""

	import sys
	import os
	import tempfile
	import logging

	import pandas as pd
	import gradio as gr

	from agent import run_pipeline

	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	log = logging.getLogger(__name__)


	# ─────────────────────────────────────────────────────────────────────────────
	# Core processing wrapper for Gradio
	# ─────────────────────────────────────────────────────────────────────────────

	def process_csv(csv_file) -> tuple:
	"""
	Gradio handler: receives an uploaded file object, runs the full pipeline,
	and returns display-ready outputs.

	Returns
	-------
	(
	status_msg : str,
	review_df : pd.DataFrame – rendered in Gradio Dataframe,
	comparison_df : pd.DataFrame,
	gap_md : str – gap analysis as Markdown,
	narrative : str,
	comp_file : str – path to comparison.csv for download,
	tax_file : str – path to taxonomy_map.json for download,
	narr_file : str – path to narrative.txt for download,
	)
	"""
	if csv_file is None:
	empty = pd.DataFrame()
	return ("⚠️ Please upload a CSV file.", empty, empty, "", "", None, None, None)

	try:
	# csv_file.name is the temp-file path Gradio writes for us
	csv_path = csv_file.name if hasattr(csv_file, "name") else csv_file

	with tempfile.TemporaryDirectory() as tmpdir:
	result = run_pipeline(csv_path, output_dir=tmpdir)

	# Copy output files to a permanent temp location so Gradio can serve them
	import shutil
	out_dir = tempfile.mkdtemp()

	comp_dst = os.path.join(out_dir, "comparison.csv")
	tax_dst = os.path.join(out_dir, "taxonomy_map.json")
	narr_dst = os.path.join(out_dir, "narrative.txt")

	shutil.copy(os.path.join(tmpdir, "comparison.csv"), comp_dst)
	shutil.copy(os.path.join(tmpdir, "taxonomy_map.json"), tax_dst)
	shutil.copy(os.path.join(tmpdir, "narrative.txt"), narr_dst)

	gap = result["gap"]
	gap_md = f"""### Gap Analysis Summary

	\| Metric \| Value \|
	\|--------\|-------\|
	\| Total Topics Extracted \| {gap['total_topics']} \|
	\| MAPPED (in PAJAIS) \| {gap['mapped_count']} ({gap['mapped_percent']}%) \|
	\| NOVEL (emerging) \| {gap['novel_count']} ({gap['novel_percent']}%) \|
	\| Records Processed \| {result['record_count']} \|

	Top MAPPED themes: {', '.join(gap['top_mapped'])}

	Top NOVEL themes: {', '.join(gap['top_novel'])}
	"""

	status = (
	f"✅ Pipeline completed successfully!\n"
	f" 📄 {result['record_count']} records processed \| "
	f"🏷️ {gap['total_topics']} topics extracted \| "
	f"🗂️ {gap['mapped_count']} mapped \| "
	f"✨ {gap['novel_count']} novel"
	)

	return (
	status,
	result["review_df"],
	result["comparison_df"],
	gap_md,
	result["narrative"],
	comp_dst,
	tax_dst,
	narr_dst,
	)

	except Exception as exc:
	log.exception("Pipeline failed")
	empty = pd.DataFrame()
	return (f"❌ Error: {exc}", empty, empty, "", "", None, None, None)


	# ─────────────────────────────────────────────────────────────────────────────
	# Gradio UI
	# ─────────────────────────────────────────────────────────────────────────────

	def build_ui() -> gr.Blocks:
	css = """
	.status-box textarea { font-size: 0.95rem; font-family: monospace; }
	.narrative-box textarea { font-size: 0.9rem; line-height: 1.6; }
	"""

	with gr.Blocks(
	title="Topic Modelling System",
	theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"),
	css=css,
	) as demo:

	gr.Markdown(
	"""
	# 📚 Topic Modelling System
	Automated research-theme extraction, PAJAIS mapping, and gap analysis

	Upload a CSV file containing `title` and `abstract` columns to begin.
	The system will extract ≥ 98 topics, compare title vs abstract themes,
	map topics against the PAJAIS taxonomy, and generate a 500-word academic narrative.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	csv_input = gr.File(
	label="📂 Upload CSV (title + abstract columns)",
	file_types=[".csv"],
	type="filepath",
	)
	run_btn = gr.Button("🚀 Run Analysis", variant="primary", size="lg")

	with gr.Column(scale=2):
	status_out = gr.Textbox(
	label="Status",
	interactive=False,
	lines=3,
	elem_classes=["status-box"],
	)

	gr.Markdown("---")

	with gr.Tabs():
	with gr.TabItem("🏷️ Extracted Topics"):
	review_table = gr.Dataframe(
	label="Topic Review Table (topic_id \| keyword \| frequency)",
	wrap=True,
	interactive=False,
	)

	with gr.TabItem("🔄 Title vs Abstract Comparison"):
	comparison_table = gr.Dataframe(
	label="Comparison Table",
	wrap=True,
	interactive=False,
	)

	with gr.TabItem("📊 Gap Analysis"):
	gap_md_out = gr.Markdown()

	with gr.TabItem("📝 Narrative (≈500 words)"):
	narrative_out = gr.Textbox(
	label="Academic Narrative",
	lines=28,
	interactive=False,
	elem_classes=["narrative-box"],
	)

	gr.Markdown("### 📥 Download Output Files")
	with gr.Row():
	dl_comparison = gr.File(label="comparison.csv", interactive=False)
	dl_taxonomy = gr.File(label="taxonomy_map.json", interactive=False)
	dl_narrative = gr.File(label="narrative.txt", interactive=False)

	run_btn.click(
	fn=process_csv,
	inputs=[csv_input],
	outputs=[
	status_out,
	review_table,
	comparison_table,
	gap_md_out,
	narrative_out,
	dl_comparison,
	dl_taxonomy,
	dl_narrative,
	],
	)

	gr.Markdown(
	"""
	---
	Topic Modelling System — powered by TF-IDF · LDA · NMF
	"""
	)

	return demo


	# ─────────────────────────────────────────────────────────────────────────────
	# Entry point
	# ─────────────────────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	# CLI mode: python app.py data.csv
	if len(sys.argv) > 1:
	csv_path = sys.argv[1]
	if not os.path.isfile(csv_path):
	print(f"[ERROR] File not found: {csv_path}")
	sys.exit(1)

	print(f"[CLI] Running pipeline on: {csv_path}")

	result = run_pipeline(csv_path, output_dir=".")

	print("\n" + "=" * 60)
	print("PIPELINE COMPLETE")
	print("=" * 60)
	print(f" Records processed : {result['record_count']}")
	print(f" Topics extracted : {result['gap']['total_topics']}")
	print(f" MAPPED : {result['gap']['mapped_count']} ({result['gap']['mapped_percent']}%)")
	print(f" NOVEL : {result['gap']['novel_count']} ({result['gap']['novel_percent']}%)")
	print(f" Narrative words : {len(result['narrative'].split())}")
	print("\nOutput files:")
	for f in result["output_files"]:
	print(f" → {f}")
	print("=" * 60)

	else:
	# Gradio / HuggingFace Spaces mode
	demo = build_ui()
	demo.launch(server_name="0.0.0.0", server_port=7860)