import gradio as gr import json, datetime, io, textwrap, os, tempfile, re # Optional PDF dependency try: from reportlab.lib.pagesizes import A4 from reportlab.pdfgen import canvas from reportlab.lib.units import mm REPORTLAB_AVAILABLE = True except Exception: REPORTLAB_AVAILABLE = False def _sanitize_filename(name: str, default="checklist"): name = (name or "").strip() or default name = re.sub(r"[\\/:*?\"<>|#%&{}$!`@+=]", "-", name) # remove illegal chars across OS/browsers name = re.sub(r"\s+", " ", name).strip() return name[:80] # keep it short CHECKLIST = { "Cohort & Splits": [ ("Clear inclusion/exclusion criteria and patient characteristics described", "Describe cohorts, inclusion/exclusion, and relevant demographics."), ("Separate training/validation/test by PATIENT (avoid leakage)", "Never split by record when patients have multiple records."), ("Hold-out test set used ONCE after model freeze", "No peeking / tuning after unblinding test results."), ("Use K-fold CV or fixed split with uncertainty estimate", "Prefer K-fold to get variance and out-of-fold predictions.") ], "Outcomes & Ground Truth": [ ("Outcome (labels) definition is explicit and clinically appropriate", "Aligns with domain standards; note differences across guidelines."), ("Alignment with labeling source (algorithm vs. experts) clarified", "Model mimics its labeling source; note systematic errors."), ("If cross-standard validation, perform adaptation/transfer learning", "E.g., US vs EU definitions of VT require recalibration."), ], "Classification Reporting": [ ("Confusion matrix included (per-class if multi-class)", "Provide full confusion matrix in paper or supplement."), ("Report Sensitivity/Recall and PPV/Precision", "Avoid reliance on metrics biased by majority class."), ("Use an overall robust metric (F1 or MCC for binary)", "Prefer F1 over Accuracy with imbalanced data; MCC robust for binary."), ("PR curve preferred when classes imbalanced; ROC optional", "PRC is more informative with rare positives."), ("Specify operating point and threshold selection procedure", "Report the deployed threshold; areas-under-curves should not stand alone.") ], "Regression Reporting": [ ("Report RMSE (and optionally MAE) on original scale", "Avoid only correlation; keep units understandable."), ("Apply the Standard Deviation Rule", "RMSE should be much lower than outcome SD."), ("Use Bland–Altman plots for agreement", "Visualize bias and limits of agreement; discuss clinical acceptability."), ], "Reference Models & Comparators": [ ("Compare against non-ML baselines", "Logistic/Cox/Linear regression as applicable."), ("Use same features/cohorts where possible", "Ensure fair comparison; for DL on raw signals, use common clinical markers."), ("Include established clinical scores where relevant", "E.g., CHARGE-AF, CHA2DS2-VASc, etc.") ], "Mode of Operation & Explainability": [ ("Document preprocessing and normalization with units", "Rescale back to clinical units if normalized."), ("Describe dimensionality reduction (e.g., PCA) with loadings", "Share transformation matrix; cite key contributors."), ("Report variable importance / SHAP where applicable", "For feature-based models, supply importance metrics."), ("For deep learning, include attention maps/ablation", "Indicate salient regions and potential new biomarkers.") ], "Further Testing & Clinical Validation": [ ("External validation on independent cohort(s)", "Assess generalization to new populations/devices."), ("Prospective and/or silent deployment study", "Before RCTs, test in workflow without influencing care."), ("Randomized/controlled evaluation of clinical impact", "Demonstrate patient benefit and safety before adoption."), ("Wording reflects validation stage", "Avoid over-claiming; be explicit about readiness level."), ("Compare to human experts only when appropriate", "Only if humans are the current gold standard for that task.") ], "Reproducibility, Ethics & Governance": [ ("Release code/model cards/protocols as feasible", "Aid replication; table 0 provenance from raw to research dataset."), ("Report dataset shifts/limitations and intended use", "Document populations, devices, and contraindications."), ("Bias, safety, and hallucination/confabulation risks addressed", "Discuss failure modes and mitigation/override plans."), ("Regulatory alignment noted (e.g., study design, post-market)", "Reference relevant guidelines where applicable.") ], } SECTIONS = list(CHECKLIST.keys()) TRIPOD_AI_MAP = { "Title/Abstract": ["Wording reflects validation stage"], "Background/Objectives": ["Outcome (labels) definition is explicit and clinically appropriate"], "Source of Data/Participants": ["Clear inclusion/exclusion criteria and patient characteristics described"], "Predictors/Outcome": ["Document preprocessing and normalization with units", "Alignment with labeling source (algorithm vs. experts) clarified"], "Sample Size/Missing Data": ["Use K-fold CV or fixed split with uncertainty estimate"], "Statistical Analysis Methods": ["Compare against non-ML baselines","Use same features/cohorts where possible"], "Model Development/Validation": ["Separate training/validation/test by PATIENT (avoid leakage)","Hold-out test set used ONCE after model freeze","External validation on independent cohort(s)"], "Performance Measures": ["Report Sensitivity/Recall and PPV/Precision","Use an overall robust metric (F1 or MCC for binary)","Specify operating point and threshold selection procedure","Report RMSE (and optionally MAE) on original scale","Use Bland–Altman plots for agreement"], "Interpretation/Limitations": ["Report dataset shifts/limitations and intended use","Bias, safety, and hallucination/confabulation risks addressed"], "Other Information": ["Release code/model cards/protocols as feasible","Regulatory alignment noted (e.g., study design, post-market)"] } EXEMPLAR_PROMPTS = { "PR Curve (Imbalanced classes)": [ "Report PPV (precision) and recall (sensitivity) at the chosen operating point.", "Explain why PRC is preferred over ROC with rare positives.", "Provide class prevalence and note how it sets the PRC baseline." ], "ROC Curve": [ "Include AUC/ROC but avoid relying on it alone when classes are imbalanced.", "Show how threshold tuning affects sensitivity/specificity near deployment point.", "Clarify that the deployed model uses a fixed threshold." ], "Bland–Altman (Regression)": [ "Report bias and 95% limits of agreement; relate to clinical acceptability.", "Include units (ms, bpm, etc.) and show any proportional bias.", "Use RMSE and compare against outcome SD (Standard Deviation Rule)." ], "Confusion Matrix": [ "Provide per-class matrices in supplements for multi-class tasks.", "Derive sensitivity, specificity, PPV, NPV consistently (units: % or decimals).", "Explain any abstain/unknown handling if applicable." ] } def default_state(): return {sec: {item[0]: False for item in items} for sec, items in CHECKLIST.items()} def to_markdown(state, project_title, notes): now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") lines = [f"# {project_title or 'AI in Cardiology – Best Practice Checklist'}", "", f"_Generated: {now}_", ""] for sec, items in CHECKLIST.items(): lines.append(f"## {sec}") for (label, helptext) in items: checked = state.get(sec, {}).get(label, False) box = "[x]" if checked else "[ ]" lines.append(f"- {box} **{label}** — {helptext}") lines.append("") if notes: lines.append("## Notes") lines.append(notes) lines.append("") lines.append("## TRIPOD-AI Mapping (lightweight)") for k, v in TRIPOD_AI_MAP.items(): lines.append(f"- **{k}**: " + "; ".join(v)) return "\n".join(lines) def export_markdown_file(state, title, notes, fname_override): md = to_markdown(state, title, notes) tmpdir = tempfile.mkdtemp() base = _sanitize_filename(fname_override or title or "checklist") path = os.path.join(tmpdir, base + ".md") with open(path, "w", encoding="utf-8") as f: f.write(md) return md, gr.update(value=path, visible=True) def export_pdf_file(state, title, notes, fname_override): if not REPORTLAB_AVAILABLE: return gr.update(visible=False), "PDF export requires the 'reportlab' package. Add `reportlab` to requirements.txt." md = to_markdown(state, title, notes) tmpdir = tempfile.mkdtemp() base = _sanitize_filename(fname_override or title or "checklist") path = os.path.join(tmpdir, base + ".pdf") from reportlab.lib.pagesizes import A4 from reportlab.pdfgen import canvas from reportlab.lib.units import mm c = canvas.Canvas(path, pagesize=A4) width, height = A4 margin = 20*mm x = margin y = height - margin for line in md.splitlines(): if line.startswith("# "): text = line[2:] c.setFont("Helvetica-Bold", 16) elif line.startswith("## "): text = line[3:] c.setFont("Helvetica-Bold", 12) else: text = line c.setFont("Helvetica", 10) for wrapped in textwrap.wrap(text, width=95): if y < margin + 20: c.showPage() y = height - margin c.drawString(x, y, wrapped) y -= 14 y -= 6 c.showPage() c.save() return gr.update(value=path, visible=True), "" def save_json_text(state, title, notes): payload = {"title": title, "notes": notes, "state": state, "schema_version": 8} return json.dumps(payload, indent=2) def load_json_blob(json_text): try: payload = json.loads(json_text) title = payload.get("title","") notes = payload.get("notes","") state = payload.get("state", default_state()) except Exception as e: title, notes, state = "", f"Error loading JSON: {e}", default_state() return state, title, notes def preset_example(): st = default_state() for sec in ["Cohort & Splits","Outcomes & Ground Truth","Classification Reporting","Reference Models & Comparators"]: for label in st[sec].keys(): st[sec][label] = True return st, "ECG AF Classifier – Internal Validation", "Preset focused on a balanced internal validation package; complete external and prospective items next." def reviewer_summary(state, title): strengths, concerns = [], [] for sec, items in CHECKLIST.items(): for (label, _) in items: if state.get(sec, {}).get(label, False): strengths.append(f"{sec}: {label}") else: concerns.append(f"{sec}: {label}") verdict = "Major Revision" if concerns else "Accept" summary = f"**Reviewer Summary for**: {title or 'Manuscript'}\n\n" summary += "### Strengths\n" + ("\n".join([f"- {s}" for s in strengths]) or "- (none checked)") + "\n\n" summary += "### Concerns / Missing Items\n" + ("\n".join([f"- {c}" for c in concerns]) or "- (none)") + "\n\n" summary += f"### Provisional Verdict\n- {verdict}\n" return summary with gr.Blocks(fill_height=True) as demo: gr.Markdown("# Evaluating AI-Enabled Medical Tests in Cardiology — Best Practice Checklist") gr.Markdown("Interactive checklist that operationalizes best-practice guidance for **supervised ML** medical tests in cardiology (e.g., ECG/EP). Tick items, save JSON, export a report (Markdown/PDF), map to TRIPOD-AI sections, or generate a reviewer-style summary.") with gr.Accordion("How to use", open=False): gr.Markdown(""" 1. Fill in the checklist across the tabs. 2. **Save → JSON** to keep a portable state (you can reload it later). 3. **Export** to generate a report. You'll get a download tile just below the button. 4. Use **TRIPOD-AI Mapping** to align items with reporting sections. 5. **Reviewer Mode** creates a referee-style summary for internal review. """) gr.Markdown("> **Reference paper**: *Evaluating artificial intelligence-enabled medical tests in cardiology: Best practice* (IJC Heart & Vasculature, 2025). DOI: 10.1016/j.ijcha.2025.101783.") gr.Markdown("> **Contributors to this tool**: *Jonas L. Isaksen*, Michael A. Riegler and Jørgen K. Kanters.") with gr.Row(): title = gr.Textbox(label="Project / Manuscript Title", placeholder="e.g., ECG-based AF detection model") notes = gr.Textbox(label="Notes", lines=2, placeholder="Optional notes or action items") filename_override = gr.Textbox(label="Filename (optional)", placeholder="Defaults to title (sanitized) or 'checklist'") section_checks = {} with gr.Tabs(): for sec in SECTIONS: with gr.Tab(sec): checks = [] for label, helptext in CHECKLIST[sec]: with gr.Row(): checks.append(gr.Checkbox(label=label, info=helptext, value=False)) section_checks[sec] = checks with gr.Tab("TRIPOD-AI Mapping"): gr.Markdown("### Lightweight TRIPOD-AI Mapping") trip_map_md = "\n".join([f"- **{k}**: {', '.join(v)}" for k,v in TRIPOD_AI_MAP.items()]) gr.Markdown(trip_map_md) with gr.Tab("Exemplar Figures & Prompts"): for name, tips in EXEMPLAR_PROMPTS.items(): gr.Markdown(f"### {name}") gr.Markdown("\n".join([f"- {t}" for t in tips])) with gr.Tab("Reviewer Mode"): gr.Markdown("Generate a structured reviewer-style summary based on the checklist.") b_review = gr.Button("Generate Reviewer Summary") review_md = gr.Markdown("") all_components = [] for sec in SECTIONS: all_components += section_checks[sec] state_json = gr.State(value=default_state()) def collect_state_from_args(args): st = default_state() idx = 0 for sec in SECTIONS: for label, _ in CHECKLIST[sec]: st[sec][label] = bool(args[idx]) idx += 1 return st def handler_save_json(*args): # args: checks..., title, notes, filename t = args[-3]; n = args[-2] st = collect_state_from_args(args[:-3]) return save_json_text(st, t, n) def handler_export_md(*args): t = args[-3]; n = args[-2]; fname = args[-1] st = collect_state_from_args(args[:-3]) md, file_update = export_markdown_file(st, t, n, fname) return file_update, gr.update(visible=True, value=md) def handler_export_pdf(*args): t = args[-3]; n = args[-2]; fname = args[-1] st = collect_state_from_args(args[:-3]) file_update, note = export_pdf_file(st, t, n, fname) return file_update, note def handler_reviewer(*args): t = args[-1] st = collect_state_from_args(args[:-1]) return reviewer_summary(st, t) def sync_to_ui(st): vals = [] for sec in SECTIONS: for label, _ in CHECKLIST[sec]: vals.append(bool(st.get(sec,{}).get(label, False))) return vals def handler_load(json_text): st, t, n = load_json_blob(json_text) return [st, t, n] + sync_to_ui(st) def handler_preset(): st, t, n = preset_example() return [st, t, n] + sync_to_ui(st) with gr.Row(): b_save = gr.Button("Save → JSON") b_load = gr.Button("Load JSON") b_preset = gr.Button("Load preset: ECG AF classifier") json_text = gr.Textbox(label="JSON", lines=10, placeholder="Click 'Save' to generate JSON. Paste here then 'Load' to restore.") # Markdown export layout: button + file in a row, then a collapsible preview with gr.Row(): b_export_md = gr.Button("Export as Markdown", scale=0) md_file = gr.File(label="Download Markdown", visible=False, scale=1) with gr.Accordion("Preview Markdown", open=False): md_preview = gr.Markdown("") # PDF export layout: button + file in a row, note below with gr.Row(): b_export_pdf = gr.Button("Export as PDF", scale=0) pdf_file = gr.File(label="Download PDF", visible=False, scale=1) pdf_note = gr.Markdown("") if not REPORTLAB_AVAILABLE: gr.Markdown("**Note:** PDF export requires `reportlab`. Add it to `requirements.txt`.") # Wire events b_save.click(fn=handler_save_json, inputs=all_components + [title, notes, filename_override], outputs=json_text) b_load.click(fn=handler_load, inputs=json_text, outputs=[state_json, title, notes] + all_components) b_preset.click(fn=lambda: handler_preset(), inputs=None, outputs=[state_json, title, notes] + all_components) b_export_md.click(fn=handler_export_md, inputs=all_components + [title, notes, filename_override], outputs=[md_file, md_preview]) b_export_pdf.click(fn=handler_export_pdf, inputs=all_components + [title, notes, filename_override], outputs=[pdf_file, pdf_note]) b_review.click(fn=handler_reviewer, inputs=all_components + [title], outputs=review_md) if __name__ == "__main__": demo.launch()