| | import gradio as gr |
| | import json, datetime, io, textwrap, os, tempfile, re |
| |
|
| | |
| | try: |
| | from reportlab.lib.pagesizes import A4 |
| | from reportlab.pdfgen import canvas |
| | from reportlab.lib.units import mm |
| | REPORTLAB_AVAILABLE = True |
| | except Exception: |
| | REPORTLAB_AVAILABLE = False |
| |
|
| | def _sanitize_filename(name: str, default="checklist"): |
| | name = (name or "").strip() or default |
| | name = re.sub(r"[\\/:*?\"<>|#%&{}$!`@+=]", "-", name) |
| | name = re.sub(r"\s+", " ", name).strip() |
| | return name[:80] |
| |
|
| | CHECKLIST = { |
| | "Cohort & Splits": [ |
| | ("Clear inclusion/exclusion criteria and patient characteristics described", "Describe cohorts, inclusion/exclusion, and relevant demographics."), |
| | ("Separate training/validation/test by PATIENT (avoid leakage)", "Never split by record when patients have multiple records."), |
| | ("Hold-out test set used ONCE after model freeze", "No peeking / tuning after unblinding test results."), |
| | ("Use K-fold CV or fixed split with uncertainty estimate", "Prefer K-fold to get variance and out-of-fold predictions.") |
| | ], |
| | "Outcomes & Ground Truth": [ |
| | ("Outcome (labels) definition is explicit and clinically appropriate", "Aligns with domain standards; note differences across guidelines."), |
| | ("Alignment with labeling source (algorithm vs. experts) clarified", "Model mimics its labeling source; note systematic errors."), |
| | ("If cross-standard validation, perform adaptation/transfer learning", "E.g., US vs EU definitions of VT require recalibration."), |
| | ], |
| | "Classification Reporting": [ |
| | ("Confusion matrix included (per-class if multi-class)", "Provide full confusion matrix in paper or supplement."), |
| | ("Report Sensitivity/Recall and PPV/Precision", "Avoid reliance on metrics biased by majority class."), |
| | ("Use an overall robust metric (F1 or MCC for binary)", "Prefer F1 over Accuracy with imbalanced data; MCC robust for binary."), |
| | ("PR curve preferred when classes imbalanced; ROC optional", "PRC is more informative with rare positives."), |
| | ("Specify operating point and threshold selection procedure", "Report the deployed threshold; areas-under-curves should not stand alone.") |
| | ], |
| | "Regression Reporting": [ |
| | ("Report RMSE (and optionally MAE) on original scale", "Avoid only correlation; keep units understandable."), |
| | ("Apply the Standard Deviation Rule", "RMSE should be much lower than outcome SD."), |
| | ("Use Bland–Altman plots for agreement", "Visualize bias and limits of agreement; discuss clinical acceptability."), |
| | ], |
| | "Reference Models & Comparators": [ |
| | ("Compare against non-ML baselines", "Logistic/Cox/Linear regression as applicable."), |
| | ("Use same features/cohorts where possible", "Ensure fair comparison; for DL on raw signals, use common clinical markers."), |
| | ("Include established clinical scores where relevant", "E.g., CHARGE-AF, CHA2DS2-VASc, etc.") |
| | ], |
| | "Mode of Operation & Explainability": [ |
| | ("Document preprocessing and normalization with units", "Rescale back to clinical units if normalized."), |
| | ("Describe dimensionality reduction (e.g., PCA) with loadings", "Share transformation matrix; cite key contributors."), |
| | ("Report variable importance / SHAP where applicable", "For feature-based models, supply importance metrics."), |
| | ("For deep learning, include attention maps/ablation", "Indicate salient regions and potential new biomarkers.") |
| | ], |
| | "Further Testing & Clinical Validation": [ |
| | ("External validation on independent cohort(s)", "Assess generalization to new populations/devices."), |
| | ("Prospective and/or silent deployment study", "Before RCTs, test in workflow without influencing care."), |
| | ("Randomized/controlled evaluation of clinical impact", "Demonstrate patient benefit and safety before adoption."), |
| | ("Wording reflects validation stage", "Avoid over-claiming; be explicit about readiness level."), |
| | ("Compare to human experts only when appropriate", "Only if humans are the current gold standard for that task.") |
| | ], |
| | "Reproducibility, Ethics & Governance": [ |
| | ("Release code/model cards/protocols as feasible", "Aid replication; table 0 provenance from raw to research dataset."), |
| | ("Report dataset shifts/limitations and intended use", "Document populations, devices, and contraindications."), |
| | ("Bias, safety, and hallucination/confabulation risks addressed", "Discuss failure modes and mitigation/override plans."), |
| | ("Regulatory alignment noted (e.g., study design, post-market)", "Reference relevant guidelines where applicable.") |
| | ], |
| | } |
| |
|
| | SECTIONS = list(CHECKLIST.keys()) |
| |
|
| | TRIPOD_AI_MAP = { |
| | "Title/Abstract": ["Wording reflects validation stage"], |
| | "Background/Objectives": ["Outcome (labels) definition is explicit and clinically appropriate"], |
| | "Source of Data/Participants": ["Clear inclusion/exclusion criteria and patient characteristics described"], |
| | "Predictors/Outcome": ["Document preprocessing and normalization with units", "Alignment with labeling source (algorithm vs. experts) clarified"], |
| | "Sample Size/Missing Data": ["Use K-fold CV or fixed split with uncertainty estimate"], |
| | "Statistical Analysis Methods": ["Compare against non-ML baselines","Use same features/cohorts where possible"], |
| | "Model Development/Validation": ["Separate training/validation/test by PATIENT (avoid leakage)","Hold-out test set used ONCE after model freeze","External validation on independent cohort(s)"], |
| | "Performance Measures": ["Report Sensitivity/Recall and PPV/Precision","Use an overall robust metric (F1 or MCC for binary)","Specify operating point and threshold selection procedure","Report RMSE (and optionally MAE) on original scale","Use Bland–Altman plots for agreement"], |
| | "Interpretation/Limitations": ["Report dataset shifts/limitations and intended use","Bias, safety, and hallucination/confabulation risks addressed"], |
| | "Other Information": ["Release code/model cards/protocols as feasible","Regulatory alignment noted (e.g., study design, post-market)"] |
| | } |
| |
|
| | EXEMPLAR_PROMPTS = { |
| | "PR Curve (Imbalanced classes)": [ |
| | "Report PPV (precision) and recall (sensitivity) at the chosen operating point.", |
| | "Explain why PRC is preferred over ROC with rare positives.", |
| | "Provide class prevalence and note how it sets the PRC baseline." |
| | ], |
| | "ROC Curve": [ |
| | "Include AUC/ROC but avoid relying on it alone when classes are imbalanced.", |
| | "Show how threshold tuning affects sensitivity/specificity near deployment point.", |
| | "Clarify that the deployed model uses a fixed threshold." |
| | ], |
| | "Bland–Altman (Regression)": [ |
| | "Report bias and 95% limits of agreement; relate to clinical acceptability.", |
| | "Include units (ms, bpm, etc.) and show any proportional bias.", |
| | "Use RMSE and compare against outcome SD (Standard Deviation Rule)." |
| | ], |
| | "Confusion Matrix": [ |
| | "Provide per-class matrices in supplements for multi-class tasks.", |
| | "Derive sensitivity, specificity, PPV, NPV consistently (units: % or decimals).", |
| | "Explain any abstain/unknown handling if applicable." |
| | ] |
| | } |
| |
|
| | def default_state(): |
| | return {sec: {item[0]: False for item in items} for sec, items in CHECKLIST.items()} |
| |
|
| | def to_markdown(state, project_title, notes): |
| | now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") |
| | lines = [f"# {project_title or 'AI in Cardiology – Best Practice Checklist'}", |
| | "", |
| | f"_Generated: {now}_", |
| | ""] |
| | for sec, items in CHECKLIST.items(): |
| | lines.append(f"## {sec}") |
| | for (label, helptext) in items: |
| | checked = state.get(sec, {}).get(label, False) |
| | box = "[x]" if checked else "[ ]" |
| | lines.append(f"- {box} **{label}** — {helptext}") |
| | lines.append("") |
| | if notes: |
| | lines.append("## Notes") |
| | lines.append(notes) |
| | lines.append("") |
| | lines.append("## TRIPOD-AI Mapping (lightweight)") |
| | for k, v in TRIPOD_AI_MAP.items(): |
| | lines.append(f"- **{k}**: " + "; ".join(v)) |
| | return "\n".join(lines) |
| |
|
| | def export_markdown_file(state, title, notes, fname_override): |
| | md = to_markdown(state, title, notes) |
| | tmpdir = tempfile.mkdtemp() |
| | base = _sanitize_filename(fname_override or title or "checklist") |
| | path = os.path.join(tmpdir, base + ".md") |
| | with open(path, "w", encoding="utf-8") as f: |
| | f.write(md) |
| | return md, gr.update(value=path, visible=True) |
| |
|
| | def export_pdf_file(state, title, notes, fname_override): |
| | if not REPORTLAB_AVAILABLE: |
| | return gr.update(visible=False), "PDF export requires the 'reportlab' package. Add `reportlab` to requirements.txt." |
| | md = to_markdown(state, title, notes) |
| | tmpdir = tempfile.mkdtemp() |
| | base = _sanitize_filename(fname_override or title or "checklist") |
| | path = os.path.join(tmpdir, base + ".pdf") |
| | from reportlab.lib.pagesizes import A4 |
| | from reportlab.pdfgen import canvas |
| | from reportlab.lib.units import mm |
| | c = canvas.Canvas(path, pagesize=A4) |
| | width, height = A4 |
| | margin = 20*mm |
| | x = margin |
| | y = height - margin |
| | for line in md.splitlines(): |
| | if line.startswith("# "): |
| | text = line[2:] |
| | c.setFont("Helvetica-Bold", 16) |
| | elif line.startswith("## "): |
| | text = line[3:] |
| | c.setFont("Helvetica-Bold", 12) |
| | else: |
| | text = line |
| | c.setFont("Helvetica", 10) |
| | for wrapped in textwrap.wrap(text, width=95): |
| | if y < margin + 20: |
| | c.showPage() |
| | y = height - margin |
| | c.drawString(x, y, wrapped) |
| | y -= 14 |
| | y -= 6 |
| | c.showPage() |
| | c.save() |
| | return gr.update(value=path, visible=True), "" |
| |
|
| | def save_json_text(state, title, notes): |
| | payload = {"title": title, "notes": notes, "state": state, "schema_version": 8} |
| | return json.dumps(payload, indent=2) |
| |
|
| | def load_json_blob(json_text): |
| | try: |
| | payload = json.loads(json_text) |
| | title = payload.get("title","") |
| | notes = payload.get("notes","") |
| | state = payload.get("state", default_state()) |
| | except Exception as e: |
| | title, notes, state = "", f"Error loading JSON: {e}", default_state() |
| | return state, title, notes |
| |
|
| | def preset_example(): |
| | st = default_state() |
| | for sec in ["Cohort & Splits","Outcomes & Ground Truth","Classification Reporting","Reference Models & Comparators"]: |
| | for label in st[sec].keys(): |
| | st[sec][label] = True |
| | return st, "ECG AF Classifier – Internal Validation", "Preset focused on a balanced internal validation package; complete external and prospective items next." |
| |
|
| | def reviewer_summary(state, title): |
| | strengths, concerns = [], [] |
| | for sec, items in CHECKLIST.items(): |
| | for (label, _) in items: |
| | if state.get(sec, {}).get(label, False): |
| | strengths.append(f"{sec}: {label}") |
| | else: |
| | concerns.append(f"{sec}: {label}") |
| | verdict = "Major Revision" if concerns else "Accept" |
| | summary = f"**Reviewer Summary for**: {title or 'Manuscript'}\n\n" |
| | summary += "### Strengths\n" + ("\n".join([f"- {s}" for s in strengths]) or "- (none checked)") + "\n\n" |
| | summary += "### Concerns / Missing Items\n" + ("\n".join([f"- {c}" for c in concerns]) or "- (none)") + "\n\n" |
| | summary += f"### Provisional Verdict\n- {verdict}\n" |
| | return summary |
| |
|
| | with gr.Blocks(fill_height=True) as demo: |
| | gr.Markdown("# Evaluating AI-Enabled Medical Tests in Cardiology — Best Practice Checklist") |
| | gr.Markdown("Interactive checklist that operationalizes best-practice guidance for **supervised ML** medical tests in cardiology (e.g., ECG/EP). Tick items, save JSON, export a report (Markdown/PDF), map to TRIPOD-AI sections, or generate a reviewer-style summary.") |
| | with gr.Accordion("How to use", open=False): |
| | gr.Markdown(""" |
| | 1. Fill in the checklist across the tabs. |
| | 2. **Save → JSON** to keep a portable state (you can reload it later). |
| | 3. **Export** to generate a report. You'll get a download tile just below the button. |
| | 4. Use **TRIPOD-AI Mapping** to align items with reporting sections. |
| | 5. **Reviewer Mode** creates a referee-style summary for internal review. |
| | """) |
| | gr.Markdown("> **Reference paper**: *Evaluating artificial intelligence-enabled medical tests in cardiology: Best practice* (IJC Heart & Vasculature, 2025). DOI: 10.1016/j.ijcha.2025.101783.") |
| | gr.Markdown("> **Contributors to this tool**: *Jonas L. Isaksen*, Michael A. Riegler and Jørgen K. Kanters.") |
| | with gr.Row(): |
| | title = gr.Textbox(label="Project / Manuscript Title", placeholder="e.g., ECG-based AF detection model") |
| | notes = gr.Textbox(label="Notes", lines=2, placeholder="Optional notes or action items") |
| | filename_override = gr.Textbox(label="Filename (optional)", placeholder="Defaults to title (sanitized) or 'checklist'") |
| |
|
| | section_checks = {} |
| | with gr.Tabs(): |
| | for sec in SECTIONS: |
| | with gr.Tab(sec): |
| | checks = [] |
| | for label, helptext in CHECKLIST[sec]: |
| | with gr.Row(): |
| | checks.append(gr.Checkbox(label=label, info=helptext, value=False)) |
| | section_checks[sec] = checks |
| |
|
| | with gr.Tab("TRIPOD-AI Mapping"): |
| | gr.Markdown("### Lightweight TRIPOD-AI Mapping") |
| | trip_map_md = "\n".join([f"- **{k}**: {', '.join(v)}" for k,v in TRIPOD_AI_MAP.items()]) |
| | gr.Markdown(trip_map_md) |
| |
|
| | with gr.Tab("Exemplar Figures & Prompts"): |
| | for name, tips in EXEMPLAR_PROMPTS.items(): |
| | gr.Markdown(f"### {name}") |
| | gr.Markdown("\n".join([f"- {t}" for t in tips])) |
| |
|
| | with gr.Tab("Reviewer Mode"): |
| | gr.Markdown("Generate a structured reviewer-style summary based on the checklist.") |
| | b_review = gr.Button("Generate Reviewer Summary") |
| | review_md = gr.Markdown("") |
| |
|
| | all_components = [] |
| | for sec in SECTIONS: |
| | all_components += section_checks[sec] |
| |
|
| | state_json = gr.State(value=default_state()) |
| |
|
| | def collect_state_from_args(args): |
| | st = default_state() |
| | idx = 0 |
| | for sec in SECTIONS: |
| | for label, _ in CHECKLIST[sec]: |
| | st[sec][label] = bool(args[idx]) |
| | idx += 1 |
| | return st |
| |
|
| | def handler_save_json(*args): |
| | |
| | t = args[-3]; n = args[-2] |
| | st = collect_state_from_args(args[:-3]) |
| | return save_json_text(st, t, n) |
| |
|
| | def handler_export_md(*args): |
| | t = args[-3]; n = args[-2]; fname = args[-1] |
| | st = collect_state_from_args(args[:-3]) |
| | md, file_update = export_markdown_file(st, t, n, fname) |
| | return file_update, gr.update(visible=True, value=md) |
| |
|
| | def handler_export_pdf(*args): |
| | t = args[-3]; n = args[-2]; fname = args[-1] |
| | st = collect_state_from_args(args[:-3]) |
| | file_update, note = export_pdf_file(st, t, n, fname) |
| | return file_update, note |
| |
|
| | def handler_reviewer(*args): |
| | t = args[-1] |
| | st = collect_state_from_args(args[:-1]) |
| | return reviewer_summary(st, t) |
| |
|
| | def sync_to_ui(st): |
| | vals = [] |
| | for sec in SECTIONS: |
| | for label, _ in CHECKLIST[sec]: |
| | vals.append(bool(st.get(sec,{}).get(label, False))) |
| | return vals |
| |
|
| | def handler_load(json_text): |
| | st, t, n = load_json_blob(json_text) |
| | return [st, t, n] + sync_to_ui(st) |
| |
|
| | def handler_preset(): |
| | st, t, n = preset_example() |
| | return [st, t, n] + sync_to_ui(st) |
| |
|
| | with gr.Row(): |
| | b_save = gr.Button("Save → JSON") |
| | b_load = gr.Button("Load JSON") |
| | b_preset = gr.Button("Load preset: ECG AF classifier") |
| |
|
| | json_text = gr.Textbox(label="JSON", lines=10, placeholder="Click 'Save' to generate JSON. Paste here then 'Load' to restore.") |
| |
|
| | |
| | with gr.Row(): |
| | b_export_md = gr.Button("Export as Markdown", scale=0) |
| | md_file = gr.File(label="Download Markdown", visible=False, scale=1) |
| | with gr.Accordion("Preview Markdown", open=False): |
| | md_preview = gr.Markdown("") |
| |
|
| | |
| | with gr.Row(): |
| | b_export_pdf = gr.Button("Export as PDF", scale=0) |
| | pdf_file = gr.File(label="Download PDF", visible=False, scale=1) |
| | pdf_note = gr.Markdown("") |
| |
|
| | if not REPORTLAB_AVAILABLE: |
| | gr.Markdown("**Note:** PDF export requires `reportlab`. Add it to `requirements.txt`.") |
| |
|
| | |
| | b_save.click(fn=handler_save_json, inputs=all_components + [title, notes, filename_override], outputs=json_text) |
| | b_load.click(fn=handler_load, inputs=json_text, outputs=[state_json, title, notes] + all_components) |
| | b_preset.click(fn=lambda: handler_preset(), inputs=None, outputs=[state_json, title, notes] + all_components) |
| |
|
| | b_export_md.click(fn=handler_export_md, inputs=all_components + [title, notes, filename_override], outputs=[md_file, md_preview]) |
| | b_export_pdf.click(fn=handler_export_pdf, inputs=all_components + [title, notes, filename_override], outputs=[pdf_file, pdf_note]) |
| |
|
| | b_review.click(fn=handler_reviewer, inputs=all_components + [title], outputs=review_md) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|