| """
|
| MedClear - HuggingFace Space
|
| Medical text simplification with FLAN-T5 + MedlinePlus RAG.
|
| """
|
|
|
| import os
|
| import re
|
| import urllib.parse
|
| import urllib.request
|
| import xml.etree.ElementTree as ET
|
|
|
| import gradio as gr
|
| import torch
|
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
| MODEL_ID = "DTanzillo/medclear-v2-base"
|
| MEDLINEPLUS_API = "https://wsearch.nlm.nih.gov/ws/query"
|
| SIMPLIFY_PREFIX = "simplify: "
|
|
|
|
|
| TERM_PATTERNS = {
|
|
|
| "afebrile": "no fever",
|
| "augmentin": "an antibiotic (amoxicillin/clavulanate)",
|
| "bilateral": "on both sides",
|
| "cataract": "clouding of the lens in the eye",
|
| "distension": "swelling or bloating",
|
| "dorsal": "the back side (of the hand, foot, etc.)",
|
| "fracture": "a broken bone",
|
| "ibuprofen": "an over-the-counter anti-inflammatory pain reliever",
|
| "intraoperative": "during surgery",
|
| "IOL": "intraocular lens (artificial lens implant for the eye)",
|
| "irrigation": "flushing a wound with fluid to clean it",
|
| "laparoscopic": "minimally invasive surgery using small incisions and a camera",
|
| "moxifloxacin": "an antibiotic eye drop",
|
| "omentum": "a fatty tissue layer that covers organs in the abdomen",
|
| "oxycodone": "a prescription opioid pain medication",
|
| "perforation": "a hole or tear in an organ wall",
|
| "phacoemulsification": "cataract removal surgery using ultrasound",
|
| "prednisolone": "a steroid medication to reduce inflammation",
|
| "RLQ": "right lower quadrant (lower right area of the abdomen)",
|
| "tendon": "a strong cord connecting muscle to bone",
|
| "tetanus": "a serious bacterial infection; a booster shot prevents it",
|
| "topical anesthesia": "numbing medication applied to the skin surface",
|
| "unilateral": "on one side only",
|
| "visual acuity": "sharpness of vision (e.g., 20/20 is normal)",
|
| "abscess": "a pocket of pus from an infection",
|
| "acetaminophen": "Tylenol (over-the-counter pain and fever reliever)",
|
| "absorbable sutures": "stitches that dissolve on their own",
|
| "acute": "sudden and severe",
|
| "incision": "a surgical cut",
|
| "local anesthesia": "numbing medication injected at the procedure site",
|
| "aponeurotomy": "a procedure to cut tight tissue bands",
|
| "chalazion": "a painless bump on the eyelid from a blocked oil gland",
|
| "contracture": "permanent tightening of tissue that limits movement",
|
| "curettage": "scraping out tissue from inside a body cavity",
|
| "Dupuytren": "a hand condition where fingers curl inward from tight tissue",
|
| "erythema": "redness of the skin",
|
| "granulomatous": "containing a clump of immune cells (granuloma)",
|
| "hemostasis": "stopping of bleeding",
|
| "laceration": "a cut or tear in the skin",
|
| "mupirocin": "an antibiotic ointment for skin infections",
|
| "paronychia": "an infection around the fingernail or toenail",
|
| "fluctuant": "soft and fluid-filled (when pressed)",
|
| "I&D": "incision and drainage (cutting open and draining an infection)",
|
| "iodoform": "an antiseptic gauze used to pack wounds",
|
| "purulent": "containing pus",
|
| "TMP-SMX": "a combination antibiotic (Bactrim/Septra)",
|
| "LLQ": "left lower quadrant (lower left area of the abdomen)",
|
| "LUQ": "left upper quadrant (upper left area of the abdomen)",
|
| "RUQ": "right upper quadrant (upper right area of the abdomen)",
|
| "ventral": "the front side (of the body)",
|
| "ambulating": "walking",
|
|
|
| "A1C": "3-month blood sugar average",
|
| "ABG": "arterial blood gas test",
|
| "ACL": "anterior cruciate ligament (knee)",
|
| "ADHD": "attention deficit hyperactivity disorder",
|
| "adhesions": "internal scars connecting body structures",
|
| "ADL": "activities of daily living",
|
| "AIDS": "acquired immunodeficiency syndrome",
|
| "AKA": "above-knee amputation",
|
| "AKI": "sudden kidney injury",
|
| "ALS": "amyotrophic lateral sclerosis (Lou Gehrig's disease)",
|
| "AMA": "against medical advice",
|
| "AMI": "heart attack",
|
| "amniocentesis": "test of fluid around baby in the womb",
|
| "anaemia": "low red blood cell count causing tiredness",
|
| "anaesthesia": "medication to prevent pain during procedures",
|
| "anemia": "low red blood cells",
|
| "aneurysm": "weakened, ballooning blood vessel",
|
| "angioplasty": "opening a blocked artery",
|
| "antibiotics": "medicines to fight bacterial infections",
|
| "anticoagulant": "blood-thinning medication",
|
| "appendectomy": "appendix removal surgery",
|
| "appendicitis": "appendix inflammation",
|
| "ARDS": "severe lung failure",
|
| "arrhythmia": "irregular heart rhythm",
|
| "arthroplasty": "joint replacement surgery",
|
| "ASAP": "as soon as possible",
|
| "ascites": "abdominal fluid buildup",
|
| "ASD": "autism spectrum disorder",
|
| "atrial fibrillation": "irregular heart rhythm",
|
| "BID": "twice daily",
|
| "biopsy": "tissue sample for testing",
|
| "BKA": "below-knee amputation",
|
| "BMI": "body mass index",
|
| "BMP": "basic blood chemistry panel",
|
| "BP": "blood pressure",
|
| "BPH": "enlarged prostate",
|
| "BR": "bed rest",
|
| "bradycardia": "slow heart rate",
|
| "breech": "baby positioned bottom-first in the womb",
|
| "bronchoscopy": "lung airway camera exam",
|
| "BUN": "blood urea nitrogen (kidney test)",
|
| "CABG": "heart bypass surgery",
|
| "CAD": "coronary artery disease",
|
| "caesarean": "surgical delivery of a baby through the abdomen",
|
| "CAT": "CT scan (a type of X-ray)",
|
| "catheter": "a small tube passed through the body",
|
| "catheterization": "threading a tube into the heart",
|
| "CBC": "complete blood count",
|
| "CC": "chief complaint",
|
| "CCU": "coronary care unit",
|
| "cervix": "the entrance or neck of the womb",
|
| "CHD": "congenital heart disease",
|
| "CHF": "heart failure",
|
| "CHI": "closed head injury",
|
| "cholecystectomy": "gallbladder removal surgery",
|
| "cholecystitis": "gallbladder inflammation",
|
| "cirrhosis": "liver scarring",
|
| "CKD": "chronic kidney disease",
|
| "CMV": "cytomegalovirus (a common virus)",
|
| "CNS": "central nervous system (brain and spinal cord)",
|
| "colectomy": "colon removal surgery",
|
| "colonoscopy": "colon camera exam",
|
| "colposcopy": "examination of the cervix using a microscope",
|
| "conception": "when an egg is fertilized by sperm",
|
| "contraception": "birth control",
|
| "COPD": "chronic lung disease",
|
| "corticosteroids": "anti-inflammatory hormonal medications",
|
| "CP": "cerebral palsy",
|
| "CPAP": "continuous positive airway pressure",
|
| "CPR": "cardiopulmonary resuscitation",
|
| "craniotomy": "skull opening surgery",
|
| "CRF": "chronic kidney failure",
|
| "CRP": "inflammation marker",
|
| "CSF": "cerebrospinal fluid (fluid around brain and spine)",
|
| "CT": "computerized tomography scan",
|
| "CVA": "stroke",
|
| "CXR": "chest X-ray",
|
| "D&C": "surgical procedure on the uterus",
|
| "DC": "discharge",
|
| "debridement": "removal of dead tissue",
|
| "DES": "drug-coated stent",
|
| "diabetes": "condition causing high blood sugar",
|
| "dilatation": "the cervix opening during labor",
|
| "discectomy": "disc removal surgery",
|
| "DKA": "diabetic emergency (ketoacidosis)",
|
| "DM": "diabetes",
|
| "DNR": "do not resuscitate",
|
| "DOA": "dead on arrival",
|
| "DOE": "shortness of breath with exertion",
|
| "DVT": "deep vein blood clot",
|
| "dysphagia": "difficulty swallowing",
|
| "dyspnea": "shortness of breath",
|
| "EBL": "estimated blood loss",
|
| "ECG": "heart tracing test",
|
| "ECHO": "heart ultrasound",
|
| "eclampsia": "seizures as a complication of pre-eclampsia",
|
| "ectopic pregnancy": "pregnancy growing outside the womb",
|
| "ED": "emergency department",
|
| "edema": "swelling from fluid",
|
| "EEG": "brain wave test",
|
| "EF": "heart pumping percentage",
|
| "effusion": "fluid buildup",
|
| "EKG": "heart tracing test",
|
| "embolism": "blood clot blocking a vessel",
|
| "EMG": "muscle electrical test",
|
| "encephalopathy": "brain dysfunction",
|
| "endometriosis": "womb lining tissue growing in other places",
|
| "endoscopy": "internal camera exam",
|
| "ENT": "ear, nose and throat",
|
| "epidural": "pain relief injection into the lower back",
|
| "episiotomy": "a cut to widen the birth opening during delivery",
|
| "ER": "emergency room",
|
| "ERCP": "procedure to check liver, gallbladder, bile ducts and pancreas",
|
| "ESR": "inflammation marker",
|
| "ESRD": "kidney failure",
|
| "ETOH": "alcohol",
|
| "extubation": "removing a breathing tube",
|
| "fasciotomy": "emergency muscle compartment release",
|
| "fibroids": "non-cancerous growths in the womb wall",
|
| "fibrosis": "scarring",
|
| "forceps": "instruments used to help deliver a baby",
|
| "FWB": "full weight bearing",
|
| "Fx": "fracture",
|
| "GCS": "consciousness score",
|
| "GERD": "acid reflux disease",
|
| "gestational diabetes": "diabetes triggered during pregnancy",
|
| "GFR": "kidney function test",
|
| "GI": "gastrointestinal (digestive system)",
|
| "GSW": "gunshot wound",
|
| "H/A": "headache",
|
| "HAV": "hepatitis A virus",
|
| "HBV": "hepatitis B virus",
|
| "HCV": "hepatitis C virus",
|
| "HDL": "good cholesterol",
|
| "HEENT": "head, eyes, ears, nose, throat",
|
| "hematemesis": "vomiting blood",
|
| "hematuria": "blood in urine",
|
| "hemoptysis": "coughing up blood",
|
| "hemorrhage": "severe bleeding",
|
| "HEP": "home exercise program",
|
| "heparin": "blood-thinning injection",
|
| "hepatitis": "liver inflammation",
|
| "HIV": "human immunodeficiency virus",
|
| "HPV": "human papillomavirus",
|
| "HR": "heart rate",
|
| "HRT": "hormone replacement therapy",
|
| "HTN": "high blood pressure",
|
| "Hx": "history",
|
| "hyperglycemia": "high blood sugar",
|
| "hyperkalemia": "high potassium",
|
| "hypertension": "high blood pressure",
|
| "hypoglycemia": "low blood sugar",
|
| "hyponatremia": "low sodium",
|
| "hypotension": "low blood pressure",
|
| "hysterectomy": "uterus removal surgery",
|
| "IBD": "inflammatory bowel disease",
|
| "IBS": "irritable bowel syndrome",
|
| "ICD": "implantable heart defibrillator",
|
| "ICU": "intensive care unit",
|
| "IM": "into the muscle",
|
| "incontinence": "loss of bladder or bowel control",
|
| "induction": "starting labor artificially",
|
| "infusion": "medication given slowly through an IV",
|
| "INR": "blood clotting measure",
|
| "intubation": "placing a breathing tube",
|
| "ischemia": "reduced blood flow",
|
| "IUD": "intrauterine device (birth control)",
|
| "IV": "into the vein",
|
| "IVF": "in vitro fertilization (test tube baby procedure)",
|
| "jaundice": "yellowing of the skin and eyes",
|
| "laminectomy": "spine decompression surgery",
|
| "laparoscopy": "keyhole surgery using small cuts",
|
| "laparotomy": "abdominal opening surgery",
|
| "LDL": "bad cholesterol",
|
| "leukocytosis": "elevated white blood cells",
|
| "LFT": "liver function tests",
|
| "LOS": "length of stay",
|
| "LP": "lumbar puncture (spinal tap)",
|
| "LUE": "left upper extremity",
|
| "mastectomy": "breast removal surgery",
|
| "meconium": "baby's first bowel movement (black and sticky)",
|
| "meningitis": "inflammation of the brain lining",
|
| "menopause": "when periods stop, usually around age 50",
|
| "MI": "heart attack",
|
| "miscarriage": "loss of pregnancy before 23 weeks",
|
| "MRI": "magnetic resonance imaging",
|
| "MRSA": "antibiotic-resistant staph infection",
|
| "MS": "multiple sclerosis",
|
| "MVA": "motor vehicle accident",
|
| "myelopathy": "spinal cord compression",
|
| "myocardial infarction": "heart attack",
|
| "necrosis": "tissue death",
|
| "nephrectomy": "kidney removal surgery",
|
| "neuropathy": "nerve damage",
|
| "NG": "nasogastric (tube through nose to stomach)",
|
| "NIHSS": "stroke severity score",
|
| "NKA": "no known allergies",
|
| "NPO": "nothing by mouth",
|
| "NSAID": "anti-inflammatory drug (like ibuprofen)",
|
| "NSTEMI": "heart attack (non-ST elevation type)",
|
| "NWB": "non-weight bearing",
|
| "OA": "osteoarthritis",
|
| "OCD": "obsessive-compulsive disorder",
|
| "oedema": "swelling from fluid buildup",
|
| "OR": "operating room",
|
| "OT": "occupational therapy",
|
| "ovulation": "when an egg is released from the ovary",
|
| "PACU": "post-anesthesia care unit (recovery room)",
|
| "PAD": "peripheral artery disease",
|
| "pancreatitis": "pancreas inflammation",
|
| "PCI": "opening blocked artery with catheter/stent",
|
| "PD": "Parkinson's disease",
|
| "PE": "blood clot in lung",
|
| "PEG": "feeding tube through the stomach wall",
|
| "peritonitis": "abdominal lining infection",
|
| "PET": "PET scan (imaging test)",
|
| "PFT": "lung function test",
|
| "PID": "pelvic inflammatory disease",
|
| "placenta": "organ connecting mother and baby during pregnancy",
|
| "placenta praevia": "placenta covering the cervix",
|
| "platelets": "blood cells needed for clotting",
|
| "PMH": "past medical history",
|
| "PNA": "pneumonia",
|
| "pneumonia": "lung infection",
|
| "PO": "by mouth",
|
| "POD": "post-operative day",
|
| "polyp": "a growth of tissue on the lining of an organ",
|
| "pre-eclampsia": "high blood pressure and protein in urine during pregnancy",
|
| "premature birth": "baby born before 37 weeks",
|
| "PRN": "as needed",
|
| "prolapse": "organ pushing through the vaginal wall",
|
| "prostaglandin": "hormone that causes womb contractions",
|
| "prostatectomy": "prostate removal surgery",
|
| "PSA": "prostate specific antigen (prostate test)",
|
| "PT": "physical therapy",
|
| "PTSD": "post-traumatic stress disorder",
|
| "PWB": "partial weight bearing",
|
| "QHS": "at bedtime",
|
| "QID": "four times daily",
|
| "RA": "rheumatoid arthritis",
|
| "radiculopathy": "pinched nerve pain",
|
| "RBC": "red blood cell",
|
| "rehab": "rehabilitation",
|
| "RLE": "right lower extremity",
|
| "ROM": "range of motion",
|
| "RSV": "respiratory syncytial virus",
|
| "RUE": "right upper extremity",
|
| "Rx": "prescription",
|
| "SCI": "spinal cord injury",
|
| "sepsis": "life-threatening blood infection",
|
| "SIDS": "sudden infant death syndrome",
|
| "SLE": "lupus",
|
| "SNF": "skilled nursing facility",
|
| "SOB": "shortness of breath",
|
| "speculum": "instrument to open the vagina for examination",
|
| "SQ": "under the skin",
|
| "STAT": "immediately",
|
| "STD": "sexually transmitted disease",
|
| "STEMI": "heart attack (ST elevation type)",
|
| "stenosis": "abnormal narrowing",
|
| "stillbirth": "baby born dead after 23 weeks of pregnancy",
|
| "sutures": "stitches",
|
| "syncope": "fainting",
|
| "tachycardia": "fast heart rate",
|
| "TB": "tuberculosis",
|
| "TBI": "traumatic brain injury",
|
| "thoracotomy": "chest opening surgery",
|
| "THR": "total hip replacement",
|
| "thrombocytopenia": "low platelets",
|
| "thrombosis": "blood clot formation",
|
| "thyroidectomy": "thyroid removal surgery",
|
| "TIA": "mini-stroke",
|
| "TID": "three times daily",
|
| "TKR": "total knee replacement",
|
| "tocolysis": "treatment to delay or prevent early labor",
|
| "TPN": "total parenteral nutrition (IV feeding)",
|
| "trach": "tracheostomy",
|
| "tracheostomy": "breathing tube in neck",
|
| "TSH": "thyroid stimulating hormone",
|
| "Tx": "treatment",
|
| "UA": "urinalysis (urine test)",
|
| "ultrasound": "imaging using sound waves",
|
| "urethra": "tube for passing urine",
|
| "URI": "upper respiratory infection (common cold)",
|
| "UTI": "urinary tract infection",
|
| "ventouse": "suction cup used to help deliver a baby",
|
| "VS": "vital signs",
|
| "WBC": "white blood cell",
|
| "WNL": "within normal limits",
|
| }
|
|
|
|
|
| print("Loading model...")
|
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
|
| model.eval()
|
| print("Model loaded!")
|
|
|
|
|
| def search_medlineplus(term):
|
| """Search MedlinePlus for a term."""
|
| try:
|
| encoded = urllib.parse.quote(term)
|
| url = f"{MEDLINEPLUS_API}?db=healthTopics&term={encoded}&retmax=1"
|
| req = urllib.request.Request(url, headers={"User-Agent": "MedClear/1.0"})
|
| with urllib.request.urlopen(req, timeout=5) as resp:
|
| data = resp.read().decode()
|
| root = ET.fromstring(data)
|
| doc = root.find(".//document")
|
| if doc is not None:
|
| title_elem = doc.find('.//content[@name="title"]')
|
| url_attr = doc.get("url", "")
|
| summary_elem = doc.find('.//content[@name="FullSummary"]')
|
| title = re.sub(r"<[^>]+>", "", title_elem.text).strip() if title_elem is not None and title_elem.text else ""
|
| summary = ""
|
| if summary_elem is not None and summary_elem.text:
|
| summary = re.sub(r"<[^>]+>", " ", summary_elem.text)
|
| summary = re.sub(r"\s+", " ", summary).strip()
|
| sentences = summary.split(". ")
|
| summary = ". ".join(sentences[:2]) + "."
|
| if title:
|
| return {"title": title, "url": url_attr, "summary": summary}
|
| except Exception:
|
| pass
|
| return None
|
|
|
|
|
|
|
| LEMMA_MAP = {
|
| "edematous": "edema", "oedema": "edema", "oedematous": "edema",
|
| "cataracts": "cataract", "adhesions": "adhesion",
|
| "hemorrhaging": "hemorrhage", "hemorrhagic": "hemorrhage", "haemorrhage": "hemorrhage",
|
| "anaemia": "anemia", "anemic": "anemia",
|
| "tachycardic": "tachycardia", "bradycardic": "bradycardia",
|
| "hypotensive": "hypotension", "hypertensive": "hypertension",
|
| "stenotic": "stenosis", "thrombotic": "thrombosis",
|
| "distended": "distension", "sutured": "sutures", "suturing": "sutures",
|
| "intubated": "intubation", "extubated": "extubation",
|
| "perforation": "perforation", "perforated": "perforation",
|
| "irrigated": "irrigation", "unilateral": "unilateral", "bilateral": "bilateral",
|
| "ambulatory": "ambulating", "ambulation": "ambulating",
|
| "erythematous": "erythema", "ischemic": "ischemia", "necrotic": "necrosis",
|
| "syncopal": "syncope", "embolic": "embolism", "dyspneic": "dyspnea",
|
| }
|
|
|
|
|
| CASE_SENSITIVE_ABBREVS = {"OR", "PT", "IM", "DO", "ER", "BP", "HR", "CC", "DC"}
|
|
|
|
|
| TERM_URLS = {
|
| "PO": "https://medlineplus.gov/ency/article/002023.htm",
|
| "PRN": "https://medlineplus.gov/ency/article/002023.htm",
|
| "NPO": "https://medlineplus.gov/ency/article/002023.htm",
|
| "IV": "",
|
| "IM": "https://medlineplus.gov/ency/article/003423.htm",
|
| "SQ": "https://medlineplus.gov/ency/article/003423.htm",
|
| "BID": "https://medlineplus.gov/ency/article/002023.htm",
|
| "TID": "https://medlineplus.gov/ency/article/002023.htm",
|
| "QID": "https://medlineplus.gov/ency/article/002023.htm",
|
| "QHS": "https://medlineplus.gov/ency/article/002023.htm",
|
| "DVT": "https://medlineplus.gov/deepveinthrombosis.html",
|
| "PE": "https://medlineplus.gov/pulmonaryembolism.html",
|
| "COPD": "https://medlineplus.gov/copd.html",
|
| "CHF": "https://medlineplus.gov/heartfailure.html",
|
| "CKD": "https://medlineplus.gov/chronickidneydisease.html",
|
| "CVA": "https://medlineplus.gov/stroke.html",
|
| "TIA": "https://medlineplus.gov/transientischemicattack.html",
|
| "AKI": "https://medlineplus.gov/ency/article/000501.htm",
|
| "DKA": "https://medlineplus.gov/ency/article/000320.htm",
|
| "NSTEMI": "https://medlineplus.gov/heartattack.html",
|
| "STEMI": "https://medlineplus.gov/heartattack.html",
|
| "CABG": "https://medlineplus.gov/coronaryarterybypasssurgery.html",
|
| "PCI": "https://medlineplus.gov/angioplasty.html",
|
| "EF": "https://medlineplus.gov/ency/article/003757.htm",
|
| "ICU": "https://medlineplus.gov/criticalcare.html",
|
| "PT": "https://medlineplus.gov/ency/article/001942.htm",
|
| "OT": "https://medlineplus.gov/ency/article/007455.htm",
|
| "CBC": "https://medlineplus.gov/lab-tests/complete-blood-count-cbc/",
|
| "BMI": "https://medlineplus.gov/ency/article/007196.htm",
|
| "INR": "https://medlineplus.gov/lab-tests/prothrombin-time-test-and-inr-ptinr/",
|
| "A1C": "https://medlineplus.gov/a1c.html",
|
| "EBL": "https://medlineplus.gov/bleeding.html",
|
| "ROM": "",
|
| "POD": "https://medlineplus.gov/surgery.html",
|
| "ARDS": "https://medlineplus.gov/ency/article/000103.htm",
|
| "ESRD": "https://medlineplus.gov/kidneyfailure.html",
|
| "GCS": "https://medlineplus.gov/coma.html",
|
| "BMP": "https://medlineplus.gov/lab-tests/basic-metabolic-panel-bmp/",
|
| "CRP": "https://medlineplus.gov/lab-tests/c-reactive-protein-crp-test/",
|
| "ESR": "https://medlineplus.gov/lab-tests/erythrocyte-sedimentation-rate-esr/",
|
| "DES": "https://medlineplus.gov/angioplasty.html",
|
| "NIHSS": "https://medlineplus.gov/stroke.html",
|
|
|
| "laparoscopic": "https://medlineplus.gov/ency/article/007016.htm",
|
| "ibuprofen": "https://medlineplus.gov/druginfo/meds/a682159.html",
|
| "oxycodone": "https://medlineplus.gov/druginfo/meds/a682132.html",
|
| "prednisolone": "https://medlineplus.gov/druginfo/meds/a615042.html",
|
| "tetanus": "https://medlineplus.gov/tetanus.html",
|
| "abscess": "https://medlineplus.gov/abscess.html",
|
| "I&D": "https://medlineplus.gov/abscess.html",
|
| "acetaminophen": "https://medlineplus.gov/druginfo/meds/a681004.html",
|
| "chalazion": "https://medlineplus.gov/ency/article/001006.htm",
|
| "contracture": "https://medlineplus.gov/ency/article/003185.htm",
|
| "Dupuytren": "https://medlineplus.gov/ency/article/007692.htm",
|
| "erythema": "https://medlineplus.gov/skininfections.html",
|
| "hemostasis": "https://medlineplus.gov/bleeding.html",
|
| "laceration": "https://medlineplus.gov/cuts.html",
|
| "mupirocin": "https://medlineplus.gov/druginfo/meds/a688004.html",
|
| "paronychia": "https://medlineplus.gov/ency/article/001444.htm",
|
| "sutures": "https://medlineplus.gov/ency/article/000043.htm",
|
| "local anesthesia": "https://medlineplus.gov/anesthesia.html",
|
| "ROM": "https://medlineplus.gov/ency/article/003165.htm",
|
|
|
| "acute": "",
|
| "afebrile": "",
|
| "ambulating": "",
|
| "aponeurotomy": "",
|
| "bilateral": "",
|
| "curettage": "",
|
| "distension": "",
|
| "dorsal": "",
|
| "fluctuant": "",
|
| "granulomatous": "",
|
| "intraoperative": "",
|
| "iodoform": "",
|
| "irrigation": "",
|
| "omentum": "",
|
| "purulent": "",
|
| "tendon": "",
|
| "topical anesthesia": "",
|
| "unilateral": "",
|
| "ventral": "",
|
| "visual acuity": "",
|
| "augmentin": "https://medlineplus.gov/druginfo/meds/a685024.html",
|
| "TMP-SMX": "https://medlineplus.gov/druginfo/meds/a684025.html",
|
| "moxifloxacin": "https://medlineplus.gov/druginfo/meds/a604003.html",
|
| "cataract": "https://medlineplus.gov/cataract.html",
|
| "fracture": "https://medlineplus.gov/fractures.html",
|
| "IOL": "https://medlineplus.gov/cataract.html",
|
| "RLQ": "https://medlineplus.gov/abdominalpain.html",
|
| "LLQ": "https://medlineplus.gov/abdominalpain.html",
|
| "RUQ": "https://medlineplus.gov/abdominalpain.html",
|
| "LUQ": "https://medlineplus.gov/abdominalpain.html",
|
| "perforation": "https://medlineplus.gov/ency/article/000235.htm",
|
| "phacoemulsification": "https://medlineplus.gov/cataract.html",
|
|
|
| "cholecystectomy": "https://medlineplus.gov/gallbladderdiseases.html",
|
| "appendectomy": "https://medlineplus.gov/appendicitis.html",
|
| "hysterectomy": "https://medlineplus.gov/hysterectomy.html",
|
| "arthroplasty": "https://medlineplus.gov/jointreplacement.html",
|
| "colonoscopy": "https://medlineplus.gov/colonoscopy.html",
|
| "pneumonia": "https://medlineplus.gov/pneumonia.html",
|
| "sepsis": "https://medlineplus.gov/sepsis.html",
|
| "hypertension": "https://medlineplus.gov/highbloodpressure.html",
|
| "atrial fibrillation": "https://medlineplus.gov/atrialfibrillation.html",
|
| "anemia": "https://medlineplus.gov/anemia.html",
|
| "edema": "https://medlineplus.gov/edema.html",
|
| "syncope": "https://medlineplus.gov/fainting.html",
|
| "biopsy": "https://medlineplus.gov/biopsy.html",
|
| "catheterization": "https://medlineplus.gov/cardiaccatheterization.html",
|
| }
|
|
|
|
|
| def _build_term_pattern(term):
|
| """Build regex with proper boundary logic."""
|
| escaped = re.escape(term)
|
| is_short = len(term) <= 3 and term.isupper()
|
| if term == "POD":
|
| return re.compile(r'(?<![A-Za-z])POD\s*\d+(?![A-Za-z])|(?<![A-Za-z])POD(?![A-Za-z0-9])')
|
| elif is_short and term in CASE_SENSITIVE_ABBREVS:
|
| return re.compile(r'(?<![A-Za-z])' + escaped + r'(?![A-Za-z])')
|
| elif is_short:
|
| return re.compile(r'(?<![A-Za-z])' + escaped + r'(?![A-Za-z])', re.IGNORECASE)
|
| else:
|
| return re.compile(r'\b' + escaped + r'\b', re.IGNORECASE)
|
|
|
|
|
| def find_terms(text):
|
| """Find medical terms in text with proper boundaries and lemma support."""
|
| found = []
|
| found_lower = set()
|
| covered = set()
|
|
|
| all_terms = []
|
| for term in TERM_PATTERNS:
|
| all_terms.append((term, term, _build_term_pattern(term)))
|
| for variant, canonical in LEMMA_MAP.items():
|
| if canonical in TERM_PATTERNS and variant.lower() not in (t.lower() for t in TERM_PATTERNS):
|
| all_terms.append((variant, canonical, _build_term_pattern(variant)))
|
|
|
| all_terms.sort(key=lambda x: len(x[0]), reverse=True)
|
|
|
| for display_term, canonical_term, pattern in all_terms:
|
| for match in pattern.finditer(text):
|
| match_positions = set(range(match.start(), match.end()))
|
| if match_positions & covered:
|
| continue
|
| term_key = canonical_term.lower()
|
| if term_key in found_lower:
|
| continue
|
| found_lower.add(term_key)
|
| covered.update(match_positions)
|
|
|
| simple = TERM_PATTERNS[canonical_term]
|
|
|
| url = ""
|
| if canonical_term.upper() in TERM_URLS:
|
| url = TERM_URLS[canonical_term.upper()]
|
| elif canonical_term.lower() in TERM_URLS:
|
| url = TERM_URLS[canonical_term.lower()]
|
| elif canonical_term in TERM_URLS:
|
| url = TERM_URLS[canonical_term]
|
|
|
| found.append((match.group(), simple, url))
|
| return found
|
|
|
|
|
| def simplify(clinical_text):
|
| """Main pipeline: simplify clinical text with term annotations."""
|
| if not clinical_text.strip():
|
| return "", ""
|
|
|
|
|
| input_text = SIMPLIFY_PREFIX + clinical_text
|
| inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
|
| with torch.no_grad():
|
| output_ids = model.generate(
|
| **inputs, max_new_tokens=256, num_beams=4,
|
| early_stopping=True, no_repeat_ngram_size=3,
|
| )
|
| plain_language = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
|
|
| terms = find_terms(clinical_text)
|
| glossary_lines = []
|
| for term_text, simple_def, curated_url in terms:
|
| if curated_url:
|
|
|
| glossary_lines.append(
|
| f"**{term_text}** -- {simple_def} \n"
|
| f"[Learn more on MedlinePlus]({curated_url})"
|
| )
|
| else:
|
|
|
| glossary_lines.append(f"**{term_text}** -- {simple_def}")
|
| glossary_lines.append("")
|
|
|
| glossary = "\n".join(glossary_lines)
|
| return plain_language, glossary
|
|
|
|
|
| EXAMPLES = [
|
| [
|
| "Patient underwent laparoscopic cholecystectomy for acute cholecystitis. "
|
| "Intraoperative findings revealed a distended, edematous gallbladder with "
|
| "adhesions to the omentum. Critical view of safety was achieved. EBL minimal. "
|
| "Patient tolerated the procedure well. POD1: afebrile, tolerating PO diet, "
|
| "ambulating independently. Discharged on ibuprofen and oxycodone PRN. "
|
| "Follow-up in 2 weeks."
|
| ],
|
| [
|
| "68-year-old male with NSTEMI. Left heart catheterization with PCI to LAD. "
|
| "Angiography revealed 95% stenosis of proximal LAD. Successful DES placement "
|
| "with TIMI 3 flow. Echo showed EF 45% with anterior wall hypokinesis. "
|
| "Discharge medications: Aspirin 81mg daily, Ticagrelor 90mg BID x12 months, "
|
| "Metoprolol 50mg daily, Atorvastatin 80mg daily."
|
| ],
|
| [
|
| "72y/o M. CC: SOB, DOE, R/O Acute MI. PMHx: HTN, DMII, CAD, HFpEF. "
|
| "Presented to ED via EMS with progressive SOB and 3-pillow orthopnea x24h. "
|
| "Noncompliant with PO meds (ASA, Lisinopril) d/t financial constraints. "
|
| "Tachycardic HR 115, hypotensive BP 90/50. CXR: pulmonary edema. "
|
| "ECG: sinus tach with PVCs, no STEMI. Labs: Cr 2.1 from 0.9 baseline, "
|
| "K+ 5.5, BNP 2000. Pre-renal AKI. Troponin mildly elevated, likely demand ischemia."
|
| ],
|
| ]
|
|
|
| demo = gr.Interface(
|
| fn=simplify,
|
| inputs=gr.Textbox(
|
| label="Clinical Note",
|
| placeholder="Paste a clinical note, discharge summary, or post-op description...",
|
| lines=8,
|
| ),
|
| outputs=[
|
| gr.Textbox(label="Plain Language Version", lines=6),
|
| gr.Markdown(label="Medical Term Glossary (with MedlinePlus links)"),
|
| ],
|
| title="MedClear: Doctor-Speak to Human-Speak",
|
| description=(
|
| "Paste a clinical note and MedClear will translate it into plain language "
|
| "that patients and families can understand. Every medical term is defined "
|
| "and linked to [MedlinePlus](https://medlineplus.gov) (NIH) for verification.\n\n"
|
| "**This is an AI assistant, not medical advice.** Always talk to your doctor."
|
| ),
|
| examples=EXAMPLES,
|
| cache_examples=False,
|
| theme=gr.themes.Soft(),
|
| )
|
|
|
|
|
| import json as json_module
|
| from fastapi import FastAPI, Request
|
| from fastapi.responses import JSONResponse
|
| from fastapi.middleware.cors import CORSMiddleware
|
| import uvicorn
|
|
|
| app = FastAPI()
|
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
|
|
|
|
| @app.post("/api/simplify")
|
| async def api_simplify(request: Request):
|
| data = await request.json()
|
| if not data or "text" not in data:
|
| return JSONResponse({"error": "Missing 'text' field"}, status_code=400)
|
|
|
| clinical_text = data["text"]
|
| plain_language, _ = simplify(clinical_text)
|
|
|
| terms = find_terms(clinical_text)
|
| annotations = []
|
| for term_text, simple_def, curated_url in terms:
|
| pattern = _build_term_pattern(term_text)
|
| match = pattern.search(clinical_text)
|
| if match:
|
| annotations.append({
|
| "term": match.group(),
|
| "simple": simple_def,
|
| "start": match.start(),
|
| "end": match.end(),
|
| "url": curated_url,
|
| "medlineplus_summary": "",
|
| })
|
|
|
| annotations.sort(key=lambda x: x["start"])
|
| return JSONResponse({
|
| "input": clinical_text,
|
| "plain_language": plain_language,
|
| "source_annotations": annotations,
|
| "output_annotations": [],
|
| })
|
|
|
|
|
| @app.get("/api/health")
|
| async def api_health():
|
| return JSONResponse({"status": "ok", "model_loaded": True})
|
|
|
|
|
|
|
| app = gr.mount_gradio_app(app, demo, path="/")
|
|
|
| if __name__ == "__main__":
|
| uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|