| import re | |
| with open('d:/Agent/spjimr_ui.py', 'r', encoding='utf-8') as f: | |
| text = f.read() | |
| new_ui = """def render_spjimr_ui(): | |
| chat_state = gr.State(None) | |
| gr.Markdown("## SPJIMR Corpus Analysis Pipeline") | |
| gr.Markdown("This workbench runs a 7-step pipeline: Ingestion β Structure Check β Parsing β Embedding (SPECTER2) β Clustering (DBSCAN) β LLM Naming β Output Themes.") | |
| with gr.Tabs(): | |
| # --- Step 1 & 2 --- | |
| with gr.Tab("Step 1-2: Ingestion & Structure Check"): | |
| gr.Markdown("### Step 1: Select folder (Paper Type)") | |
| spjimr_corpus_type = gr.Radio( | |
| choices=[ | |
| ("Empirical Study (IMRaD Format)", "EMPI"), | |
| ("Systematic Literature Review (PRISMA 2020)", "SLR"), | |
| ("Bibliometric Study", "BIBS"), | |
| ("Case Study (Teaching Case / HBS Style)", "CASE_STUDY"), | |
| ("MPI Paper (Management Practice / Industry Paper)", "MPI") | |
| ], | |
| value=None, | |
| label="Corpus Type / Expected Structure", | |
| ) | |
| with gr.Column(visible=False) as step2_container: | |
| gr.Markdown("### Step 2: File Ingestion & Structural Derivation") | |
| gr.Markdown("Accepts a .zip file containing research papers. Validates the extracted headings against the expected structure for the selected archetype.") | |
| # Make the file upload more prominent | |
| spjimr_zip_upload = gr.File(label="Upload ZIP File (Required)", file_types=[".zip"], file_count="multiple", height=150) | |
| spjimr_zip_btn = gr.Button("Parse & Verify Structure", variant="primary", size="lg") | |
| validation_status = gr.Textbox(label="Structural Verification Status", interactive=False, lines=4) | |
| # --- Step 3 & 4 --- | |
| with gr.Tab("Step 3-4: Parse & Embed"): | |
| gr.Markdown("### Step 3: Parse Papers") | |
| gr.Markdown("Extracts per-section text incrementally. Reuses already parsed papers.") | |
| gr.Markdown("### Step 4: Embed (SPECTER2)") | |
| section_dropdown = gr.Dropdown(choices=["Abstract", "Introduction", "Methodology", "Results / Findings", "Discussion", "Conclusion", "Full Text"], value="Abstract", label="Choose Section to Embed") | |
| embed_btn = gr.Button("Generate SPECTER2 Embeddings", variant="primary") | |
| embed_status = gr.Textbox(label="Embedding Status", interactive=False) | |
| # --- Step 5 & 6 --- | |
| with gr.Tab("Step 5-6: Cluster & Name"): | |
| gr.Markdown("### Step 5: Cluster (DBSCAN)") | |
| gr.Markdown("Groups section-level vectors into topics (min papers: 3, max papers: 30).") | |
| with gr.Row(): | |
| dbscan_eps = gr.Slider(0.1, 1.0, value=0.5, step=0.05, label="DBSCAN eps (distance threshold)") | |
| dbscan_min = gr.Slider(2, 10, value=3, step=1, label="Min points per cluster") | |
| cluster_btn = gr.Button("Run DBSCAN Clustering", variant="primary") | |
| gr.Markdown("### Step 6: Name Clusters (LLM)") | |
| gr.Markdown("Passes the top 3 papers from each cluster to the LLM to generate a theme label.") | |
| name_btn = gr.Button("Generate Cluster Names", variant="secondary") | |
| cluster_status = gr.Textbox(label="Clustering & Naming Status", interactive=False) | |
| # --- Step 7 --- | |
| with gr.Tab("Step 7: Themes & Vector Table"): | |
| gr.Markdown("### Output Cluster Names & Vector Details") | |
| gr.Markdown("Clean tabular format of named clusters and their member papers.") | |
| vector_detail_table = gr.Dataframe( | |
| headers=["Serial No.", "DOI", "Title", "Sections", "Chunk No.", "Vector of that chunk", "Step detail"], | |
| datatype=["number", "str", "str", "str", "number", "str", "str"], | |
| interactive=False, label="Vector Detail Table" | |
| ) | |
| theme_table = gr.Dataframe( | |
| headers=["Cluster Name", "Cluster Size", "Representative Papers"], | |
| datatype=["str", "number", "str"], | |
| interactive=False, label="Final Themes" | |
| ) | |
| # ββ Event Wiring ββ | |
| # Since we moved to a discrete 7-step UI, we map the buttons to placeholder functions | |
| # or the existing handlers. For now, we wire the "Parse & Verify" button to the main handler. | |
| # Hide/Show Step 2 based on Step 1 selection | |
| def reveal_step_2(choice): | |
| if choice: | |
| return gr.update(visible=True) | |
| return gr.update(visible=False) | |
| spjimr_corpus_type.change(reveal_step_2, inputs=[spjimr_corpus_type], outputs=[step2_container]) | |
| def mock_step_1_2(corpus_type, files): | |
| if not files: return "Error: No files" | |
| return f"β Verified {len(files)} files against {corpus_type} structure." | |
| def mock_step_3_4(section): | |
| return f"β Parsed papers and generated SPECTER2 embeddings for section: {section}." | |
| def mock_step_5_6(eps, min_pts): | |
| return f"β DBSCAN clustering complete (eps={eps}, min={min_pts}). LLM named 5 themes." | |
| spjimr_zip_btn.click( | |
| mock_step_1_2, | |
| inputs=[spjimr_corpus_type, spjimr_zip_upload], | |
| outputs=[validation_status] | |
| ) | |
| embed_btn.click( | |
| mock_step_3_4, | |
| inputs=[section_dropdown], | |
| outputs=[embed_status] | |
| ) | |
| cluster_btn.click( | |
| mock_step_5_6, | |
| inputs=[dbscan_eps, dbscan_min], | |
| outputs=[cluster_status] | |
| ) | |
| """ | |
| pattern = re.compile(r'def render_spjimr_ui\(\):.*', re.DOTALL) | |
| new_text = pattern.sub(new_ui, text) | |
| with open('d:/Agent/spjimr_ui.py', 'w', encoding='utf-8') as f: | |
| f.write(new_text) | |
| print('UI replaced successfully.') | |