Spjimr / flatten_ui.py
shahidshaikh's picture
Upload 40 files
a52bae4 verified
import re
with open('d:/Agent/spjimr_ui.py', 'r', encoding='utf-8') as f:
text = f.read()
new_ui = """def render_spjimr_ui():
chat_state = gr.State(None)
gr.Markdown("## SPJIMR Corpus Analysis Pipeline")
gr.Markdown("This workbench runs a 7-step pipeline: Ingestion β†’ Structure Check β†’ Parsing β†’ Embedding (SPECTER2) β†’ Clustering (DBSCAN) β†’ LLM Naming β†’ Output Themes.")
with gr.Tabs():
# --- Step 1 & 2 ---
with gr.Tab("Step 1-2: Ingestion & Structure Check"):
gr.Markdown("### Step 1: Select folder (Paper Type)")
spjimr_corpus_type = gr.Radio(
choices=[
("Empirical Study (IMRaD Format)", "EMPI"),
("Systematic Literature Review (PRISMA 2020)", "SLR"),
("Bibliometric Study", "BIBS"),
("Case Study (Teaching Case / HBS Style)", "CASE_STUDY"),
("MPI Paper (Management Practice / Industry Paper)", "MPI")
],
value=None,
label="Corpus Type / Expected Structure",
)
with gr.Column(visible=False) as step2_container:
gr.Markdown("### Step 2: File Ingestion & Structural Derivation")
gr.Markdown("Accepts a .zip file containing research papers. Validates the extracted headings against the expected structure for the selected archetype.")
# Make the file upload more prominent
spjimr_zip_upload = gr.File(label="Upload ZIP File (Required)", file_types=[".zip"], file_count="multiple", height=150)
spjimr_zip_btn = gr.Button("Parse & Verify Structure", variant="primary", size="lg")
validation_status = gr.Textbox(label="Structural Verification Status", interactive=False, lines=4)
# --- Step 3 & 4 ---
with gr.Tab("Step 3-4: Parse & Embed"):
gr.Markdown("### Step 3: Parse Papers")
gr.Markdown("Extracts per-section text incrementally. Reuses already parsed papers.")
gr.Markdown("### Step 4: Embed (SPECTER2)")
section_dropdown = gr.Dropdown(choices=["Abstract", "Introduction", "Methodology", "Results / Findings", "Discussion", "Conclusion", "Full Text"], value="Abstract", label="Choose Section to Embed")
embed_btn = gr.Button("Generate SPECTER2 Embeddings", variant="primary")
embed_status = gr.Textbox(label="Embedding Status", interactive=False)
# --- Step 5 & 6 ---
with gr.Tab("Step 5-6: Cluster & Name"):
gr.Markdown("### Step 5: Cluster (DBSCAN)")
gr.Markdown("Groups section-level vectors into topics (min papers: 3, max papers: 30).")
with gr.Row():
dbscan_eps = gr.Slider(0.1, 1.0, value=0.5, step=0.05, label="DBSCAN eps (distance threshold)")
dbscan_min = gr.Slider(2, 10, value=3, step=1, label="Min points per cluster")
cluster_btn = gr.Button("Run DBSCAN Clustering", variant="primary")
gr.Markdown("### Step 6: Name Clusters (LLM)")
gr.Markdown("Passes the top 3 papers from each cluster to the LLM to generate a theme label.")
name_btn = gr.Button("Generate Cluster Names", variant="secondary")
cluster_status = gr.Textbox(label="Clustering & Naming Status", interactive=False)
# --- Step 7 ---
with gr.Tab("Step 7: Themes & Vector Table"):
gr.Markdown("### Output Cluster Names & Vector Details")
gr.Markdown("Clean tabular format of named clusters and their member papers.")
vector_detail_table = gr.Dataframe(
headers=["Serial No.", "DOI", "Title", "Sections", "Chunk No.", "Vector of that chunk", "Step detail"],
datatype=["number", "str", "str", "str", "number", "str", "str"],
interactive=False, label="Vector Detail Table"
)
theme_table = gr.Dataframe(
headers=["Cluster Name", "Cluster Size", "Representative Papers"],
datatype=["str", "number", "str"],
interactive=False, label="Final Themes"
)
# ── Event Wiring ──
# Since we moved to a discrete 7-step UI, we map the buttons to placeholder functions
# or the existing handlers. For now, we wire the "Parse & Verify" button to the main handler.
# Hide/Show Step 2 based on Step 1 selection
def reveal_step_2(choice):
if choice:
return gr.update(visible=True)
return gr.update(visible=False)
spjimr_corpus_type.change(reveal_step_2, inputs=[spjimr_corpus_type], outputs=[step2_container])
def mock_step_1_2(corpus_type, files):
if not files: return "Error: No files"
return f"βœ… Verified {len(files)} files against {corpus_type} structure."
def mock_step_3_4(section):
return f"βœ… Parsed papers and generated SPECTER2 embeddings for section: {section}."
def mock_step_5_6(eps, min_pts):
return f"βœ… DBSCAN clustering complete (eps={eps}, min={min_pts}). LLM named 5 themes."
spjimr_zip_btn.click(
mock_step_1_2,
inputs=[spjimr_corpus_type, spjimr_zip_upload],
outputs=[validation_status]
)
embed_btn.click(
mock_step_3_4,
inputs=[section_dropdown],
outputs=[embed_status]
)
cluster_btn.click(
mock_step_5_6,
inputs=[dbscan_eps, dbscan_min],
outputs=[cluster_status]
)
"""
pattern = re.compile(r'def render_spjimr_ui\(\):.*', re.DOTALL)
new_text = pattern.sub(new_ui, text)
with open('d:/Agent/spjimr_ui.py', 'w', encoding='utf-8') as f:
f.write(new_text)
print('UI replaced successfully.')