import re with open('d:/Agent/spjimr_ui.py', 'r', encoding='utf-8') as f: text = f.read() new_ui = """def render_spjimr_ui(): chat_state = gr.State(None) gr.Markdown("## SPJIMR Corpus Analysis Pipeline") gr.Markdown("This workbench runs a 7-step pipeline: Ingestion → Structure Check → Parsing → Embedding (SPECTER2) → Clustering (DBSCAN) → LLM Naming → Output Themes.") with gr.Tabs(): # --- Step 1 & 2 --- with gr.Tab("Step 1-2: Ingestion & Structure Check"): gr.Markdown("### Step 1: Select folder (Paper Type)") spjimr_corpus_type = gr.Radio( choices=[ ("Empirical Study (IMRaD Format)", "EMPI"), ("Systematic Literature Review (PRISMA 2020)", "SLR"), ("Bibliometric Study", "BIBS"), ("Case Study (Teaching Case / HBS Style)", "CASE_STUDY"), ("MPI Paper (Management Practice / Industry Paper)", "MPI") ], value=None, label="Corpus Type / Expected Structure", ) with gr.Column(visible=False) as step2_container: gr.Markdown("### Step 2: File Ingestion & Structural Derivation") gr.Markdown("Accepts a .zip file containing research papers. Validates the extracted headings against the expected structure for the selected archetype.") # Make the file upload more prominent spjimr_zip_upload = gr.File(label="Upload ZIP File (Required)", file_types=[".zip"], file_count="multiple", height=150) spjimr_zip_btn = gr.Button("Parse & Verify Structure", variant="primary", size="lg") validation_status = gr.Textbox(label="Structural Verification Status", interactive=False, lines=4) # --- Step 3 & 4 --- with gr.Tab("Step 3-4: Parse & Embed"): gr.Markdown("### Step 3: Parse Papers") gr.Markdown("Extracts per-section text incrementally. Reuses already parsed papers.") gr.Markdown("### Step 4: Embed (SPECTER2)") section_dropdown = gr.Dropdown(choices=["Abstract", "Introduction", "Methodology", "Results / Findings", "Discussion", "Conclusion", "Full Text"], value="Abstract", label="Choose Section to Embed") embed_btn = gr.Button("Generate SPECTER2 Embeddings", variant="primary") embed_status = gr.Textbox(label="Embedding Status", interactive=False) # --- Step 5 & 6 --- with gr.Tab("Step 5-6: Cluster & Name"): gr.Markdown("### Step 5: Cluster (DBSCAN)") gr.Markdown("Groups section-level vectors into topics (min papers: 3, max papers: 30).") with gr.Row(): dbscan_eps = gr.Slider(0.1, 1.0, value=0.5, step=0.05, label="DBSCAN eps (distance threshold)") dbscan_min = gr.Slider(2, 10, value=3, step=1, label="Min points per cluster") cluster_btn = gr.Button("Run DBSCAN Clustering", variant="primary") gr.Markdown("### Step 6: Name Clusters (LLM)") gr.Markdown("Passes the top 3 papers from each cluster to the LLM to generate a theme label.") name_btn = gr.Button("Generate Cluster Names", variant="secondary") cluster_status = gr.Textbox(label="Clustering & Naming Status", interactive=False) # --- Step 7 --- with gr.Tab("Step 7: Themes & Vector Table"): gr.Markdown("### Output Cluster Names & Vector Details") gr.Markdown("Clean tabular format of named clusters and their member papers.") vector_detail_table = gr.Dataframe( headers=["Serial No.", "DOI", "Title", "Sections", "Chunk No.", "Vector of that chunk", "Step detail"], datatype=["number", "str", "str", "str", "number", "str", "str"], interactive=False, label="Vector Detail Table" ) theme_table = gr.Dataframe( headers=["Cluster Name", "Cluster Size", "Representative Papers"], datatype=["str", "number", "str"], interactive=False, label="Final Themes" ) # ── Event Wiring ── # Since we moved to a discrete 7-step UI, we map the buttons to placeholder functions # or the existing handlers. For now, we wire the "Parse & Verify" button to the main handler. # Hide/Show Step 2 based on Step 1 selection def reveal_step_2(choice): if choice: return gr.update(visible=True) return gr.update(visible=False) spjimr_corpus_type.change(reveal_step_2, inputs=[spjimr_corpus_type], outputs=[step2_container]) def mock_step_1_2(corpus_type, files): if not files: return "Error: No files" return f"✅ Verified {len(files)} files against {corpus_type} structure." def mock_step_3_4(section): return f"✅ Parsed papers and generated SPECTER2 embeddings for section: {section}." def mock_step_5_6(eps, min_pts): return f"✅ DBSCAN clustering complete (eps={eps}, min={min_pts}). LLM named 5 themes." spjimr_zip_btn.click( mock_step_1_2, inputs=[spjimr_corpus_type, spjimr_zip_upload], outputs=[validation_status] ) embed_btn.click( mock_step_3_4, inputs=[section_dropdown], outputs=[embed_status] ) cluster_btn.click( mock_step_5_6, inputs=[dbscan_eps, dbscan_min], outputs=[cluster_status] ) """ pattern = re.compile(r'def render_spjimr_ui\(\):.*', re.DOTALL) new_text = pattern.sub(new_ui, text) with open('d:/Agent/spjimr_ui.py', 'w', encoding='utf-8') as f: f.write(new_text) print('UI replaced successfully.')