File size: 4,781 Bytes
39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 39a2c30 9af99d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
"""
NLP OHCA Classifier v3.0 - Improved Methodology
A BERT-based classifier for detecting Out-of-Hospital Cardiac Arrest (OHCA)
cases in medical discharge notes using improved machine learning methodology.
Key Improvements in v3.0:
- Patient-level data splits to prevent data leakage
- Proper train/validation/test methodology
- Optimal threshold finding and usage
- Larger annotation samples for better performance
- Unbiased evaluation framework
This package contains two main modules:
1. ohca_training_pipeline: Complete training pipeline with improved methodology
2. ohca_inference: Apply pre-trained models with optimal threshold support
"""
# Training pipeline imports - v3.0 with improvements
from .ohca_training_pipeline import (
# Improved functions
create_patient_level_splits,
complete_improved_training_pipeline,
complete_annotation_and_train_v3,
find_optimal_threshold,
evaluate_on_test_set,
save_model_with_metadata,
# Legacy functions (backward compatible)
create_training_sample,
prepare_training_data,
train_ohca_model,
evaluate_model,
complete_training_pipeline,
complete_annotation_and_train,
# Dataset class
OHCATrainingDataset
)
# Inference imports - v3.0 with optimal threshold support
from .ohca_inference import (
# New v3.0 functions with optimal threshold support
load_ohca_model_with_metadata,
run_inference_with_optimal_threshold,
quick_inference_with_optimal_threshold,
process_large_dataset_with_optimal_threshold,
analyze_predictions_enhanced,
# Legacy functions (backward compatible)
load_ohca_model,
run_inference,
quick_inference,
process_large_dataset,
test_model_on_sample,
get_high_confidence_cases,
analyze_predictions,
# Dataset class
OHCAInferenceDataset
)
__version__ = "3.0.0"
__author__ = "Mona Moukaddem"
__email__ = "your.email@example.com"
# v3.0 improved functions (recommended)
__improved_training_functions__ = [
"create_patient_level_splits",
"complete_improved_training_pipeline",
"complete_annotation_and_train_v3",
"find_optimal_threshold",
"evaluate_on_test_set",
"save_model_with_metadata"
]
__improved_inference_functions__ = [
"load_ohca_model_with_metadata",
"run_inference_with_optimal_threshold",
"quick_inference_with_optimal_threshold",
"process_large_dataset_with_optimal_threshold",
"analyze_predictions_enhanced"
]
# Legacy functions (maintained for backward compatibility)
__legacy_training_functions__ = [
"create_training_sample",
"prepare_training_data",
"train_ohca_model",
"evaluate_model",
"complete_training_pipeline",
"complete_annotation_and_train",
"OHCATrainingDataset"
]
__legacy_inference_functions__ = [
"load_ohca_model",
"run_inference",
"quick_inference",
"process_large_dataset",
"test_model_on_sample",
"get_high_confidence_cases",
"analyze_predictions",
"OHCAInferenceDataset"
]
# All available functions
__all__ = (
__improved_training_functions__ +
__improved_inference_functions__ +
__legacy_training_functions__ +
__legacy_inference_functions__
)
# Methodology information
__methodology_version__ = "3.0"
__improvements__ = [
"Patient-level data splits prevent data leakage",
"Proper train/validation/test methodology",
"Optimal threshold finding and consistent usage",
"Larger annotation samples (800 train + 200 val)",
"Unbiased evaluation on independent test set",
"Enhanced clinical decision support",
"Backward compatibility with legacy models"
]
def get_version_info():
"""Return detailed version and methodology information"""
return {
'version': __version__,
'methodology_version': __methodology_version__,
'improvements': __improvements__,
'author': __author__,
'recommended_functions': {
'training': 'complete_improved_training_pipeline',
'inference': 'quick_inference_with_optimal_threshold'
}
}
def print_welcome_message():
"""Print welcome message with key improvements"""
print("="*60)
print("NLP OHCA Classifier v3.0 - Improved Methodology")
print("="*60)
print("Key improvements addressing data scientist feedback:")
for improvement in __improvements__:
print(f"✅ {improvement}")
print()
print("Recommended functions:")
print("• Training: complete_improved_training_pipeline()")
print("• Inference: quick_inference_with_optimal_threshold()")
print()
print("Legacy functions maintained for backward compatibility.")
print("="*60)
# Print welcome message when package is imported
print_welcome_message()
|