MattStammers's picture
added explainer at the top
29210be verified
import gradio as gr
from transformers import pipeline
# Load the Distil_IBD_BERT model for IBD classification
classifier = pipeline(
"text-classification",
model="MattStammers/Distil_IBD_BERT",
return_all_scores=True
)
def classify_doc(text):
res = classifier(text)[0]
# Choose the highest-scoring label
top = max(res, key=lambda x: x["score"])
label_map = {"LABEL_0": "Non-IBD", "LABEL_1": "IBD"}
label = label_map.get(top["label"], top["label"])
return label, round(top["score"], 3)
# Example reports for testing
positive_example = """Patient: 45-year-old female
Procedure: Colonoscopy
Clinical History: 6-month history of intermittent bloody diarrhea, abdominal cramping, and tenesmus.
Findings:
• Diffuse mucosal erythema and friability extending continuously from rectum through sigmoid colon.
• Multiple superficial ulcerations (3-5 mm) with easily induced bleeding.
• Loss of normal vascular pattern and pseudopolyps in the descending colon.
Biopsies taken from ulcer margin reveal crypt abscesses.
Impression: Findings are consistent with moderate ulcerative colitis (IBD)."""
negative_example = """
Procedure: Screening Colonoscopy
Clinical History: Diarrhoea, routine colorectal cancer screening. FIT 34
Findings:
• Normal colonic mucosa without erythema, ulceration, or friability.
• Preserved vascular pattern and intact crypt architecture.
• No crypt abscesses, granulomas, or inflammatory infiltrates.
Diagnosis: No evidence of inflammatory bowel disease (Non-IBD).
Mapping biopsies taken in light of underlying diarrhoea.
Specimen: Multiple colonic mucosal biopsies
Clinical History: Surveillance colonoscopy in patient with diverticulosis.
Microscopic Description:
• Colonic mucosa with intact crypt architecture.
• No basal plasmacytosis, crypt branching, or mucosal erosion.
• Scattered lymphocytes and plasma cells evenly distributed in lamina propria.
• No granulomas or dysplasia identified.
Diagnosis: Negative for inflammatory bowel disease."""
# Build Gradio interface
demo = gr.Interface(
fn=classify_doc,
inputs=gr.Textbox(lines=10, placeholder="Enter clinical note or patient letter…"),
outputs=[
gr.Label(num_top_classes=2, label="Prediction"),
gr.Textbox(label="Confidence")
],
examples=[
[positive_example],
[negative_example]
],
title="IBD Cohort Identifier",
description="Classify free-text clinical documents as IBD vs non-IBD using [Distil_IBD_BERT](https://huggingface.co/MattStammers/Distil_IBD_BERT). Please note these models are very likely over-fitted to the data on which they were trained and should be re-trained locally before attempting inference or results may not be amazing. However, this demo gives an idea of the models capabilities which far exceed that of any other open weight models for IBD detection in free-text currently available. Please read the paper for full information. Reference: Stammers M, Gwiggner M, Nouraei R, Metcalf C, Batchelor J. From Rule-Based to DeepSeek R1: A Robust Comparative Evaluation of Fifty Years of Natural Language Processing (NLP) Models To Identify Inflammatory Bowel Disease Cohorts. medRxiv. 2025:2025-07."
)
# Launch publicly on Hugging Face Spaces
demo.launch(share=True)