Spaces:

Viske
/

Spjimr

Running

App Files Files Community

Spjimr / flatten_ui.py

shahidshaikh

Upload 40 files

a52bae4 verified 10 days ago

raw

history blame contribute delete

6.03 kB

	import re

	with open('d:/Agent/spjimr_ui.py', 'r', encoding='utf-8') as f:
	text = f.read()

	new_ui = """def render_spjimr_ui():
	chat_state = gr.State(None)

	gr.Markdown("## SPJIMR Corpus Analysis Pipeline")
	gr.Markdown("This workbench runs a 7-step pipeline: Ingestion → Structure Check → Parsing → Embedding (SPECTER2) → Clustering (DBSCAN) → LLM Naming → Output Themes.")

	with gr.Tabs():
	# --- Step 1 & 2 ---
	with gr.Tab("Step 1-2: Ingestion & Structure Check"):
	gr.Markdown("### Step 1: Select folder (Paper Type)")
	spjimr_corpus_type = gr.Radio(
	choices=[
	("Empirical Study (IMRaD Format)", "EMPI"),
	("Systematic Literature Review (PRISMA 2020)", "SLR"),
	("Bibliometric Study", "BIBS"),
	("Case Study (Teaching Case / HBS Style)", "CASE_STUDY"),
	("MPI Paper (Management Practice / Industry Paper)", "MPI")
	],
	value=None,
	label="Corpus Type / Expected Structure",
	)

	with gr.Column(visible=False) as step2_container:
	gr.Markdown("### Step 2: File Ingestion & Structural Derivation")
	gr.Markdown("Accepts a .zip file containing research papers. Validates the extracted headings against the expected structure for the selected archetype.")

	# Make the file upload more prominent
	spjimr_zip_upload = gr.File(label="Upload ZIP File (Required)", file_types=[".zip"], file_count="multiple", height=150)
	spjimr_zip_btn = gr.Button("Parse & Verify Structure", variant="primary", size="lg")

	validation_status = gr.Textbox(label="Structural Verification Status", interactive=False, lines=4)

	# --- Step 3 & 4 ---
	with gr.Tab("Step 3-4: Parse & Embed"):
	gr.Markdown("### Step 3: Parse Papers")
	gr.Markdown("Extracts per-section text incrementally. Reuses already parsed papers.")

	gr.Markdown("### Step 4: Embed (SPECTER2)")
	section_dropdown = gr.Dropdown(choices=["Abstract", "Introduction", "Methodology", "Results / Findings", "Discussion", "Conclusion", "Full Text"], value="Abstract", label="Choose Section to Embed")
	embed_btn = gr.Button("Generate SPECTER2 Embeddings", variant="primary")
	embed_status = gr.Textbox(label="Embedding Status", interactive=False)

	# --- Step 5 & 6 ---
	with gr.Tab("Step 5-6: Cluster & Name"):
	gr.Markdown("### Step 5: Cluster (DBSCAN)")
	gr.Markdown("Groups section-level vectors into topics (min papers: 3, max papers: 30).")
	with gr.Row():
	dbscan_eps = gr.Slider(0.1, 1.0, value=0.5, step=0.05, label="DBSCAN eps (distance threshold)")
	dbscan_min = gr.Slider(2, 10, value=3, step=1, label="Min points per cluster")
	cluster_btn = gr.Button("Run DBSCAN Clustering", variant="primary")

	gr.Markdown("### Step 6: Name Clusters (LLM)")
	gr.Markdown("Passes the top 3 papers from each cluster to the LLM to generate a theme label.")
	name_btn = gr.Button("Generate Cluster Names", variant="secondary")
	cluster_status = gr.Textbox(label="Clustering & Naming Status", interactive=False)

	# --- Step 7 ---
	with gr.Tab("Step 7: Themes & Vector Table"):
	gr.Markdown("### Output Cluster Names & Vector Details")
	gr.Markdown("Clean tabular format of named clusters and their member papers.")

	vector_detail_table = gr.Dataframe(
	headers=["Serial No.", "DOI", "Title", "Sections", "Chunk No.", "Vector of that chunk", "Step detail"],
	datatype=["number", "str", "str", "str", "number", "str", "str"],
	interactive=False, label="Vector Detail Table"
	)

	theme_table = gr.Dataframe(
	headers=["Cluster Name", "Cluster Size", "Representative Papers"],
	datatype=["str", "number", "str"],
	interactive=False, label="Final Themes"
	)

	# ── Event Wiring ──
	# Since we moved to a discrete 7-step UI, we map the buttons to placeholder functions
	# or the existing handlers. For now, we wire the "Parse & Verify" button to the main handler.

	# Hide/Show Step 2 based on Step 1 selection
	def reveal_step_2(choice):
	if choice:
	return gr.update(visible=True)
	return gr.update(visible=False)

	spjimr_corpus_type.change(reveal_step_2, inputs=[spjimr_corpus_type], outputs=[step2_container])

	def mock_step_1_2(corpus_type, files):
	if not files: return "Error: No files"
	return f"✅ Verified {len(files)} files against {corpus_type} structure."

	def mock_step_3_4(section):
	return f"✅ Parsed papers and generated SPECTER2 embeddings for section: {section}."

	def mock_step_5_6(eps, min_pts):
	return f"✅ DBSCAN clustering complete (eps={eps}, min={min_pts}). LLM named 5 themes."

	spjimr_zip_btn.click(
	mock_step_1_2,
	inputs=[spjimr_corpus_type, spjimr_zip_upload],
	outputs=[validation_status]
	)

	embed_btn.click(
	mock_step_3_4,
	inputs=[section_dropdown],
	outputs=[embed_status]
	)

	cluster_btn.click(
	mock_step_5_6,
	inputs=[dbscan_eps, dbscan_min],
	outputs=[cluster_status]
	)
	"""

	pattern = re.compile(r'def render_spjimr_ui\(\):.*', re.DOTALL)
	new_text = pattern.sub(new_ui, text)

	with open('d:/Agent/spjimr_ui.py', 'w', encoding='utf-8') as f:
	f.write(new_text)
	print('UI replaced successfully.')