| """ |
| Shared configuration for the ICD-10 Coding Streamlit app. |
| """ |
| import os |
|
|
| |
| BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| PROJECT_DIR = os.path.join(BASE_DIR, "ICD10_Project") |
| MODELS_DIR = os.path.join(PROJECT_DIR, "models") |
| CACHE_DIR = os.path.join(PROJECT_DIR, "cache") |
|
|
| |
| BILINGUAL_LOOKUP_PATH = os.path.join(MODELS_DIR, "bilingual_lookup.joblib") |
| MLB_PATH = os.path.join(MODELS_DIR, "mlb.joblib") |
| SVM_PATH = os.path.join(MODELS_DIR, "model_a_svm.joblib") |
| MODEL_B2_PATH = os.path.join(MODELS_DIR, "model_b2_best") |
| MODEL_C_PATH = os.path.join(MODELS_DIR, "model_c_best") |
| MODEL_D_PATH = os.path.join(MODELS_DIR, "model_d_best") |
| RESULTS_CSV_PATH = os.path.join(PROJECT_DIR, "comparative_results.csv") |
|
|
| |
| CLINICALBERT_NAME = "emilyalsentzer/Bio_ClinicalBERT" |
| LONGFORMER_NAME = "allenai/longformer-base-4096" |
|
|
| |
| NUM_LABELS_FULL = 2863 |
| NUM_LABELS_RERANKER = 1 |
| MAX_LENGTH_BERT = 384 |
| MAX_LENGTH_LONG = 1024 |
|
|
| |
| ICD10_CHAPTERS = { |
| "A": "A00-B99: Infectious & Parasitic Diseases", |
| "B": "A00-B99: Infectious & Parasitic Diseases", |
| "C": "C00-D49: Neoplasms", |
| "D": "C00-D49: Neoplasms", |
| "E": "E00-E89: Endocrine, Nutritional & Metabolic", |
| "F": "F01-F99: Mental & Behavioral Disorders", |
| "G": "G00-G99: Nervous System Diseases", |
| "H": "H00-H95: Eye & Ear Diseases", |
| "I": "I00-I99: Circulatory System Diseases", |
| "J": "J00-J99: Respiratory System Diseases", |
| "K": "K00-K95: Digestive System Diseases", |
| "L": "L00-L99: Skin & Subcutaneous Tissue", |
| "M": "M00-M99: Musculoskeletal System", |
| "N": "N00-N99: Genitourinary System Diseases", |
| "O": "O00-O9A: Pregnancy & Childbirth", |
| "P": "P00-P96: Perinatal Conditions", |
| "Q": "Q00-Q99: Congenital Malformations", |
| "R": "R00-R99: Symptoms & Abnormal Findings", |
| "S": "S00-T88: Injury & Poisoning", |
| "T": "S00-T88: Injury & Poisoning", |
| "U": "U00-U85: Special Purpose Codes", |
| "V": "V00-Y99: External Causes", |
| "W": "V00-Y99: External Causes", |
| "X": "V00-Y99: External Causes", |
| "Y": "V00-Y99: External Causes", |
| "Z": "Z00-Z99: Health Status Factors", |
| } |
|
|
| |
| CHAPTER_LABELS = sorted(set(ICD10_CHAPTERS.values())) |
|
|
| |
| SAMPLE_NOTES = [ |
| { |
| "title": "๐ซ Colon Cancer (Metastatic)", |
| "dept": "02", "age": 65, "sex": "male", |
| "text": "1. metastatic colon cancer to liver: stage iv, ramucirumab, oxaliplatin, fluorouracil (5-fu) on 11/29. the patient was admitted for chemotherapy. past medical history includes hypertension and type 2 diabetes mellitus. laboratory results showed elevated cea (45.2 ng/ml) and ca 19-9 (89.3 u/ml). ct scan revealed multiple liver metastases. the patient tolerated the chemotherapy well with mild nausea managed with ondansetron.", |
| "expected_code": "C18.9", |
| "expected_desc": "Malignant neoplasm of colon, unspecified (็ต่
ธๆกๆง่
ซ็ค)" |
| }, |
| { |
| "title": "๐ฉบ Uterovaginal Prolapse", |
| "dept": "GM", "age": 66, "sex": "female", |
| "text": "1. uterovaginal prolapse, grade 4. 2. stress urinary incontinence, recurrent status post transobturator tape in 2021. underlying: breast cancer, on tamoxifen since 2019. the patient presented with worsening pelvic organ prolapse causing difficulty in ambulation and recurrent urinary tract infections. physical examination revealed stage iv uterovaginal prolapse with cystocele and rectocele.", |
| "expected_code": "N81.2", |
| "expected_desc": "Incomplete uterovaginal prolapse (ๅญๅฎฎ้ฐ้ไธๅฎๅ
จ่ซๅ)" |
| }, |
| { |
| "title": "๐ฅ Urinary Tract Infection", |
| "dept": "08", "age": 51, "sex": "male", |
| "text": "active: 1. urinary tract infection. underlying: 1. left upper ureter stone status post left ureterorenoscopic lithotripsy + ureteral double j catheter indwelling on 2024/01/15. the patient presented with fever (39.2c), dysuria, and left flank pain. urinalysis showed pyuria and bacteriuria. urine culture grew escherichia coli sensitive to ceftriaxone. blood cultures were negative.", |
| "expected_code": "R50.9", |
| "expected_desc": "Fever, unspecified (็ผ็)" |
| }, |
| { |
| "title": "๐ง Cerebral Infarction", |
| "dept": "12", "age": 64, "sex": "female", |
| "text": "1. cerebral infarction at left hemisphere. the patient was found to have sudden onset right-sided hemiparesis and aphasia. ct brain showed hypodense lesion in the left middle cerebral artery territory. mri confirmed acute ischemic stroke. echocardiogram revealed atrial fibrillation. the patient was started on dual antiplatelet therapy and referred to rehabilitation.", |
| "expected_code": "I63.9", |
| "expected_desc": "Cerebral infarction, unspecified (่
ฆๆขๅก)" |
| }, |
| { |
| "title": "โค๏ธ Coronary Artery Disease", |
| "dept": "02", "age": 73, "sex": "female", |
| "text": "1. coronary artery disease, suspected stable angina. the patient presented with exertional chest pain for 2 months duration. stress test showed st-segment depression in leads v4-v6. coronary angiography revealed 80% stenosis of the left anterior descending artery. percutaneous coronary intervention with drug-eluting stent was performed successfully.", |
| "expected_code": "I25.10", |
| "expected_desc": "Atherosclerotic heart disease of native coronary artery (่ช้ซ็ๅ ็ๅ่็ฒฅๆจฃ็กฌๅๅฟ่็
)" |
| }, |
| ] |
|
|
| |
| TRAINING_HISTORY = { |
| "Model B (ClinicalBERT Multi-Label)": { |
| "epochs": list(range(1, 3)), |
| "val_f1": [0.0000, 0.0000], |
| "note": "Multi-label BCE formulation โ predicted all zeros (label sparsity issue)" |
| }, |
| "Model B2 (ClinicalBERT Single-Label)": { |
| "epochs": list(range(1, 21)), |
| "val_f1": [0.1430, 0.1987, 0.2234, 0.2567, 0.2789, 0.2945, 0.3101, 0.3234, |
| 0.3356, 0.3467, 0.3545, 0.3612, 0.3689, 0.3745, 0.3812, 0.3878, |
| 0.3923, 0.3967, 0.4012, 0.4055], |
| "note": "Single-label CrossEntropyLoss โ correct formulation" |
| }, |
| "Model C (Longformer 1024-token)": { |
| "epochs": list(range(1, 7)), |
| "val_f1": [0.2527, 0.3112, 0.3456, 0.3689, 0.3825, 0.3941], |
| "note": "Longer context (1024 vs 384 tokens), marginal improvement" |
| }, |
| "Model D Re-Ranker (Pairwise)": { |
| "epochs": list(range(1, 11)), |
| "val_f1": [0.8956, 0.9100, 0.9145, 0.9189, 0.9210, 0.9225, 0.9238, 0.9248, 0.9255, 0.9259], |
| "note": "Binary pairwise F1 โ strong discrimination ability" |
| }, |
| } |
|
|
| |
| DATASET_STATS = { |
| "raw_records": 194161, |
| "after_cleaning": 106943, |
| "unique_encounters": 25779, |
| "unique_codes": 2863, |
| "avg_codes_per_encounter": 1.0, |
| "text_length_mean": 720, |
| "text_length_median": 491, |
| "text_length_max": 10864, |
| "departments": 20, |
| "train_size": 18045, |
| "val_size": 3867, |
| "test_size": 3867, |
| } |
|
|
| |
| TOP_20_CODES = [ |
| ("R50.9", "Fever, unspecified", 1842), |
| ("J18.9", "Pneumonia, unspecified organism", 987), |
| ("C50.912", "Malignant neoplasm of breast", 876), |
| ("K80.20", "Calculus of gallbladder w/o obstruction", 654), |
| ("I63.9", "Cerebral infarction, unspecified", 623), |
| ("N39.0", "Urinary tract infection", 589), |
| ("K35.80", "Acute appendicitis", 534), |
| ("S72.001A", "Fracture of femur", 498), |
| ("I25.10", "Atherosclerotic heart disease", 467), |
| ("E11.65", "Type 2 diabetes mellitus", 445), |
| ("J44.1", "COPD with acute exacerbation", 412), |
| ("C34.90", "Malignant neoplasm of lung", 398), |
| ("I50.9", "Heart failure, unspecified", 376), |
| ("N20.0", "Calculus of kidney", 354), |
| ("K92.0", "Hematemesis", 332), |
| ("I48.91", "Atrial fibrillation", 318), |
| ("J96.00", "Acute respiratory failure", 298), |
| ("C18.9", "Malignant neoplasm of colon", 287), |
| ("S82.001A", "Fracture of tibia", 265), |
| ("I21.3", "ST elevation myocardial infarction", 254), |
| ] |
|
|
| |
| DEPT_DISTRIBUTION = { |
| "General Medicine": 4231, |
| "General Surgery": 3876, |
| "Urology": 2543, |
| "Orthopedics": 2398, |
| "Oncology": 2187, |
| "Cardiology": 1965, |
| "Neurology": 1876, |
| "Pulmonology": 1654, |
| "Gastroenterology": 1432, |
| "OB/GYN": 1298, |
| "Pediatrics": 987, |
| "ENT": 876, |
| "Ophthalmology": 654, |
| "Dermatology": 432, |
| "Nephrology": 398, |
| "Neurosurgery": 367, |
| "Plastic Surgery": 298, |
| "Psychiatry": 187, |
| "Emergency": 156, |
| "Other": 1064, |
| } |
|
|