DocUA's picture
Initial StructCore Space
7a12bf7
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Dict, List, Tuple
# Allow running as a script: `python apps/challenge_demo/app_challenge.py`
if __package__ in {None, ""}:
repo_root = Path(__file__).resolve().parents[2]
if str(repo_root) not in sys.path:
sys.path.insert(0, str(repo_root))
import gradio as gr
import pandas as pd
from apps.challenge_demo.services.case_library import get_case, load_cases
from apps.challenge_demo.services.evidence_service import load_evidence_rows
from apps.challenge_demo.services.structcore_service import (
StructCoreConfig,
lines_to_rows,
result_to_debug_json,
run_structcore,
)
def _default_case_id() -> str:
cases = load_cases()
return cases[0].id if cases else "custom"
def _case_choices() -> List[Tuple[str, str]]:
out = []
for c in load_cases():
out.append((f"{c.title} ({c.id})", c.id))
out.append(("Custom note", "custom"))
return out
def _on_case_change(case_id: str) -> Tuple[str, str]:
if not case_id or case_id == "custom":
return "", "Manual mode: paste your own note text."
c = get_case(case_id)
if c is None:
return "", "Case not found."
return c.text, f"**{c.title}**\n\n{c.description}"
def _format_status(note_id: str, backend_mode: str, duration_sec: float, gate_summary: Dict, warnings: List[str], error: str | None) -> str:
ok = "yes" if gate_summary.get("parse_success") else "no"
clusters = ", ".join(gate_summary.get("clusters_present") or []) or "none"
lines = gate_summary.get("output_lines", 0)
parts = [
f"### Run Status",
f"- Note ID: `{note_id}`",
f"- Backend mode: `{backend_mode}`",
f"- Parse success: `{ok}`",
f"- Output lines: `{lines}`",
f"- Clusters: `{clusters}`",
f"- Duration (sec): `{duration_sec}`",
]
if warnings:
parts.append("- Warnings:")
parts.extend([f" - {w}" for w in warnings])
if error:
parts.append(f"- Error: `{error}`")
return "\n".join(parts)
def _format_risk_summary(risk: Dict | None) -> Tuple[str, str]:
if not risk:
return "No risk output available for this run.", "{}"
prob = risk.get("probability")
category = risk.get("risk_category")
score = risk.get("composite_score")
completeness = risk.get("data_completeness")
factors = risk.get("risk_factors") or []
bullets = [
"### Readmission Risk Summary",
f"- Category: `{category}`",
f"- Probability: `{prob}`",
f"- Composite score: `{score}`",
f"- Data completeness: `{completeness}`",
]
if factors:
bullets.append("- Top risk factors:")
for it in factors[:5]:
bullets.append(f" - {it}")
return "\n".join(bullets), json.dumps(risk, ensure_ascii=False, indent=2)
def _run_demo(
case_id: str,
note_text: str,
backend_mode: str,
stage1_url: str,
stage1_model: str,
stage2_url: str,
stage2_model: str,
fallback_to_mock: bool,
) -> Tuple[str, str, str, pd.DataFrame, str, str, str, str]:
note = (note_text or "").strip()
effective_case_id = case_id or "custom"
if not note and effective_case_id != "custom":
c = get_case(effective_case_id)
if c is not None:
note = c.text
cfg = StructCoreConfig(
backend_mode=(backend_mode or "mock").strip(),
stage1_url=(stage1_url or "").strip(),
stage1_model=(stage1_model or "").strip(),
stage2_url=(stage2_url or "").strip(),
stage2_model=(stage2_model or "").strip(),
fallback_to_mock_on_error=bool(fallback_to_mock),
)
result = run_structcore(note, effective_case_id, cfg)
status_md = _format_status(
note_id=result.note_id,
backend_mode=result.backend_mode,
duration_sec=result.duration_sec,
gate_summary=result.gate_summary,
warnings=result.warnings,
error=result.error,
)
rows = lines_to_rows(result.normalized_lines)
df = pd.DataFrame(rows, columns=["CLUSTER", "Keyword", "Value", "Timestamp"])
risk_md, risk_json = _format_risk_summary(result.risk)
return (
status_md,
result.stage1_summary,
result.stage2_raw,
df,
json.dumps(result.gate_summary, ensure_ascii=False, indent=2),
risk_md,
risk_json,
result_to_debug_json(result),
)
def build_demo() -> gr.Blocks:
cfg_defaults = StructCoreConfig()
case_choices = _case_choices()
default_case_id = _default_case_id()
initial_case = get_case(default_case_id)
initial_text = initial_case.text if initial_case else ""
initial_desc = f"**{initial_case.title}**\n\n{initial_case.description}" if initial_case else "Manual mode"
evidence_df = pd.DataFrame(load_evidence_rows(), columns=["Claim ID", "Claim", "Metric", "Status", "Artifact"])
with gr.Blocks(title="MedGemma StructCore Demo") as demo:
gr.Markdown(
"""
# MedGemma StructCore Demo
**MedGemma StructCore: Local-First Clinical Structuring Engine for EHR**
This demo is extraction-first: free-text EHR -> structured KVT4 facts -> optional downstream readmission risk view.
"""
)
with gr.Tab("1) Case Input"):
case_id = gr.Dropdown(label="Synthetic case", choices=case_choices, value=default_case_id)
case_desc = gr.Markdown(initial_desc)
note_text = gr.Textbox(label="Clinical note text", lines=14, value=initial_text)
with gr.Row():
backend_mode = gr.Radio(
label="Backend mode",
choices=["mock", "pipeline"],
value=os.getenv("STRUCTCORE_BACKEND_MODE", "mock"),
info="mock = offline deterministic demo, pipeline = Stage1/Stage2 runners with local model servers",
)
fallback_to_mock = gr.Checkbox(
label="Fallback to mock if pipeline fails",
value=True,
)
with gr.Accordion("Pipeline settings", open=False):
stage1_url = gr.Textbox(label="Stage1 URL", value=cfg_defaults.stage1_url)
stage1_model = gr.Textbox(label="Stage1 model", value=cfg_defaults.stage1_model)
stage2_url = gr.Textbox(label="Stage2 URL", value=cfg_defaults.stage2_url)
stage2_model = gr.Textbox(label="Stage2 model", value=cfg_defaults.stage2_model)
run_btn = gr.Button("Run StructCore", variant="primary")
status_md = gr.Markdown()
with gr.Tab("2) StructCore Inspector"):
stage1_summary = gr.Textbox(label="Stage1 summary", lines=14)
stage2_raw = gr.Textbox(label="Stage2 raw output", lines=14)
normalized_df = gr.Dataframe(
label="Normalized KVT4 facts",
headers=["CLUSTER", "Keyword", "Value", "Timestamp"],
datatype=["str", "str", "str", "str"],
row_count=8,
)
gate_json = gr.Textbox(label="Quality gate summary", lines=10)
with gr.Tab("3) Risk View"):
risk_md = gr.Markdown()
risk_json = gr.Textbox(label="Risk payload (JSON)", lines=18)
with gr.Tab("4) Evidence Board"):
gr.Markdown("All claims should be interpreted with explicit status labels.")
gr.Dataframe(
value=evidence_df,
headers=["Claim ID", "Claim", "Metric", "Status", "Artifact"],
datatype=["str", "str", "str", "str", "str"],
interactive=False,
wrap=True,
row_count=len(evidence_df),
label="Evidence claims",
)
with gr.Accordion("Debug JSON", open=False):
debug_json = gr.Textbox(label="Full run payload", lines=18)
case_id.change(fn=_on_case_change, inputs=[case_id], outputs=[note_text, case_desc])
run_btn.click(
fn=_run_demo,
inputs=[
case_id,
note_text,
backend_mode,
stage1_url,
stage1_model,
stage2_url,
stage2_model,
fallback_to_mock,
],
outputs=[
status_md,
stage1_summary,
stage2_raw,
normalized_df,
gate_json,
risk_md,
risk_json,
debug_json,
],
)
return demo
def main() -> None:
demo = build_demo()
launch_kwargs = {
"server_name": "0.0.0.0",
"server_port": 7863,
"show_error": True,
}
try:
demo.launch(ssr_mode=False, **launch_kwargs)
except TypeError as exc:
# Older gradio versions do not support ssr_mode.
if "ssr_mode" not in str(exc):
raise
demo.launch(**launch_kwargs)
if __name__ == "__main__":
main()