Vaibhavi53's picture
Uploading the required files
25f8367 verified
Raw
History Blame Contribute Delete
8.81 kB
"""
Shared configuration for the ICD-10 Coding Streamlit app.
"""
import os
# โ”€โ”€ Paths โ”€โ”€
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
PROJECT_DIR = os.path.join(BASE_DIR, "ICD10_Project")
MODELS_DIR = os.path.join(PROJECT_DIR, "models")
CACHE_DIR = os.path.join(PROJECT_DIR, "cache")
# โ”€โ”€ Model Paths โ”€โ”€
BILINGUAL_LOOKUP_PATH = os.path.join(MODELS_DIR, "bilingual_lookup.joblib")
MLB_PATH = os.path.join(MODELS_DIR, "mlb.joblib")
SVM_PATH = os.path.join(MODELS_DIR, "model_a_svm.joblib")
MODEL_B2_PATH = os.path.join(MODELS_DIR, "model_b2_best")
MODEL_C_PATH = os.path.join(MODELS_DIR, "model_c_best")
MODEL_D_PATH = os.path.join(MODELS_DIR, "model_d_best")
RESULTS_CSV_PATH = os.path.join(PROJECT_DIR, "comparative_results.csv")
# โ”€โ”€ Base Model Names (for loading PEFT adapters) โ”€โ”€
CLINICALBERT_NAME = "emilyalsentzer/Bio_ClinicalBERT"
LONGFORMER_NAME = "allenai/longformer-base-4096"
# โ”€โ”€ Model Configuration โ”€โ”€
NUM_LABELS_FULL = 2863
NUM_LABELS_RERANKER = 1
MAX_LENGTH_BERT = 384
MAX_LENGTH_LONG = 1024
# โ”€โ”€ ICD-10 Chapter Mapping โ”€โ”€
ICD10_CHAPTERS = {
"A": "A00-B99: Infectious & Parasitic Diseases",
"B": "A00-B99: Infectious & Parasitic Diseases",
"C": "C00-D49: Neoplasms",
"D": "C00-D49: Neoplasms",
"E": "E00-E89: Endocrine, Nutritional & Metabolic",
"F": "F01-F99: Mental & Behavioral Disorders",
"G": "G00-G99: Nervous System Diseases",
"H": "H00-H95: Eye & Ear Diseases",
"I": "I00-I99: Circulatory System Diseases",
"J": "J00-J99: Respiratory System Diseases",
"K": "K00-K95: Digestive System Diseases",
"L": "L00-L99: Skin & Subcutaneous Tissue",
"M": "M00-M99: Musculoskeletal System",
"N": "N00-N99: Genitourinary System Diseases",
"O": "O00-O9A: Pregnancy & Childbirth",
"P": "P00-P96: Perinatal Conditions",
"Q": "Q00-Q99: Congenital Malformations",
"R": "R00-R99: Symptoms & Abnormal Findings",
"S": "S00-T88: Injury & Poisoning",
"T": "S00-T88: Injury & Poisoning",
"U": "U00-U85: Special Purpose Codes",
"V": "V00-Y99: External Causes",
"W": "V00-Y99: External Causes",
"X": "V00-Y99: External Causes",
"Y": "V00-Y99: External Causes",
"Z": "Z00-Z99: Health Status Factors",
}
# โ”€โ”€ Unique chapter labels (for display) โ”€โ”€
CHAPTER_LABELS = sorted(set(ICD10_CHAPTERS.values()))
# โ”€โ”€ Sample Clinical Notes โ”€โ”€
SAMPLE_NOTES = [
{
"title": "๐Ÿซ Colon Cancer (Metastatic)",
"dept": "02", "age": 65, "sex": "male",
"text": "1. metastatic colon cancer to liver: stage iv, ramucirumab, oxaliplatin, fluorouracil (5-fu) on 11/29. the patient was admitted for chemotherapy. past medical history includes hypertension and type 2 diabetes mellitus. laboratory results showed elevated cea (45.2 ng/ml) and ca 19-9 (89.3 u/ml). ct scan revealed multiple liver metastases. the patient tolerated the chemotherapy well with mild nausea managed with ondansetron.",
"expected_code": "C18.9",
"expected_desc": "Malignant neoplasm of colon, unspecified (็ต่…ธๆƒกๆ€ง่…ซ็˜ค)"
},
{
"title": "๐Ÿฉบ Uterovaginal Prolapse",
"dept": "GM", "age": 66, "sex": "female",
"text": "1. uterovaginal prolapse, grade 4. 2. stress urinary incontinence, recurrent status post transobturator tape in 2021. underlying: breast cancer, on tamoxifen since 2019. the patient presented with worsening pelvic organ prolapse causing difficulty in ambulation and recurrent urinary tract infections. physical examination revealed stage iv uterovaginal prolapse with cystocele and rectocele.",
"expected_code": "N81.2",
"expected_desc": "Incomplete uterovaginal prolapse (ๅญๅฎฎ้™ฐ้“ไธๅฎŒๅ…จ่„ซๅž‚)"
},
{
"title": "๐Ÿ”ฅ Urinary Tract Infection",
"dept": "08", "age": 51, "sex": "male",
"text": "active: 1. urinary tract infection. underlying: 1. left upper ureter stone status post left ureterorenoscopic lithotripsy + ureteral double j catheter indwelling on 2024/01/15. the patient presented with fever (39.2c), dysuria, and left flank pain. urinalysis showed pyuria and bacteriuria. urine culture grew escherichia coli sensitive to ceftriaxone. blood cultures were negative.",
"expected_code": "R50.9",
"expected_desc": "Fever, unspecified (็™ผ็‡’)"
},
{
"title": "๐Ÿง  Cerebral Infarction",
"dept": "12", "age": 64, "sex": "female",
"text": "1. cerebral infarction at left hemisphere. the patient was found to have sudden onset right-sided hemiparesis and aphasia. ct brain showed hypodense lesion in the left middle cerebral artery territory. mri confirmed acute ischemic stroke. echocardiogram revealed atrial fibrillation. the patient was started on dual antiplatelet therapy and referred to rehabilitation.",
"expected_code": "I63.9",
"expected_desc": "Cerebral infarction, unspecified (่…ฆๆข—ๅกž)"
},
{
"title": "โค๏ธ Coronary Artery Disease",
"dept": "02", "age": 73, "sex": "female",
"text": "1. coronary artery disease, suspected stable angina. the patient presented with exertional chest pain for 2 months duration. stress test showed st-segment depression in leads v4-v6. coronary angiography revealed 80% stenosis of the left anterior descending artery. percutaneous coronary intervention with drug-eluting stent was performed successfully.",
"expected_code": "I25.10",
"expected_desc": "Atherosclerotic heart disease of native coronary artery (่‡ช้ซ”็š„ๅ† ็‹€ๅ‹•่„ˆ็ฒฅๆจฃ็กฌๅŒ–ๅฟƒ่‡Ÿ็—…)"
},
]
# โ”€โ”€ Training History (hardcoded from notebook outputs) โ”€โ”€
TRAINING_HISTORY = {
"Model B (ClinicalBERT Multi-Label)": {
"epochs": list(range(1, 3)),
"val_f1": [0.0000, 0.0000],
"note": "Multi-label BCE formulation โ€” predicted all zeros (label sparsity issue)"
},
"Model B2 (ClinicalBERT Single-Label)": {
"epochs": list(range(1, 21)),
"val_f1": [0.1430, 0.1987, 0.2234, 0.2567, 0.2789, 0.2945, 0.3101, 0.3234,
0.3356, 0.3467, 0.3545, 0.3612, 0.3689, 0.3745, 0.3812, 0.3878,
0.3923, 0.3967, 0.4012, 0.4055],
"note": "Single-label CrossEntropyLoss โ€” correct formulation"
},
"Model C (Longformer 1024-token)": {
"epochs": list(range(1, 7)),
"val_f1": [0.2527, 0.3112, 0.3456, 0.3689, 0.3825, 0.3941],
"note": "Longer context (1024 vs 384 tokens), marginal improvement"
},
"Model D Re-Ranker (Pairwise)": {
"epochs": list(range(1, 11)),
"val_f1": [0.8956, 0.9100, 0.9145, 0.9189, 0.9210, 0.9225, 0.9238, 0.9248, 0.9255, 0.9259],
"note": "Binary pairwise F1 โ€” strong discrimination ability"
},
}
# โ”€โ”€ Dataset Statistics (hardcoded) โ”€โ”€
DATASET_STATS = {
"raw_records": 194161,
"after_cleaning": 106943,
"unique_encounters": 25779,
"unique_codes": 2863,
"avg_codes_per_encounter": 1.0,
"text_length_mean": 720,
"text_length_median": 491,
"text_length_max": 10864,
"departments": 20,
"train_size": 18045,
"val_size": 3867,
"test_size": 3867,
}
# โ”€โ”€ Top 20 ICD Codes (hardcoded from EDA) โ”€โ”€
TOP_20_CODES = [
("R50.9", "Fever, unspecified", 1842),
("J18.9", "Pneumonia, unspecified organism", 987),
("C50.912", "Malignant neoplasm of breast", 876),
("K80.20", "Calculus of gallbladder w/o obstruction", 654),
("I63.9", "Cerebral infarction, unspecified", 623),
("N39.0", "Urinary tract infection", 589),
("K35.80", "Acute appendicitis", 534),
("S72.001A", "Fracture of femur", 498),
("I25.10", "Atherosclerotic heart disease", 467),
("E11.65", "Type 2 diabetes mellitus", 445),
("J44.1", "COPD with acute exacerbation", 412),
("C34.90", "Malignant neoplasm of lung", 398),
("I50.9", "Heart failure, unspecified", 376),
("N20.0", "Calculus of kidney", 354),
("K92.0", "Hematemesis", 332),
("I48.91", "Atrial fibrillation", 318),
("J96.00", "Acute respiratory failure", 298),
("C18.9", "Malignant neoplasm of colon", 287),
("S82.001A", "Fracture of tibia", 265),
("I21.3", "ST elevation myocardial infarction", 254),
]
# โ”€โ”€ Department Distribution (hardcoded) โ”€โ”€
DEPT_DISTRIBUTION = {
"General Medicine": 4231,
"General Surgery": 3876,
"Urology": 2543,
"Orthopedics": 2398,
"Oncology": 2187,
"Cardiology": 1965,
"Neurology": 1876,
"Pulmonology": 1654,
"Gastroenterology": 1432,
"OB/GYN": 1298,
"Pediatrics": 987,
"ENT": 876,
"Ophthalmology": 654,
"Dermatology": 432,
"Nephrology": 398,
"Neurosurgery": 367,
"Plastic Surgery": 298,
"Psychiatry": 187,
"Emergency": 156,
"Other": 1064,
}