""" NLP OHCA Classifier v3.0 - Improved Methodology A BERT-based classifier for detecting Out-of-Hospital Cardiac Arrest (OHCA) cases in medical discharge notes using improved machine learning methodology. Key Improvements in v3.0: - Patient-level data splits to prevent data leakage - Proper train/validation/test methodology - Optimal threshold finding and usage - Larger annotation samples for better performance - Unbiased evaluation framework This package contains two main modules: 1. ohca_training_pipeline: Complete training pipeline with improved methodology 2. ohca_inference: Apply pre-trained models with optimal threshold support """ # Training pipeline imports - v3.0 with improvements from .ohca_training_pipeline import ( # Improved functions create_patient_level_splits, complete_improved_training_pipeline, complete_annotation_and_train_v3, find_optimal_threshold, evaluate_on_test_set, save_model_with_metadata, # Legacy functions (backward compatible) create_training_sample, prepare_training_data, train_ohca_model, evaluate_model, complete_training_pipeline, complete_annotation_and_train, # Dataset class OHCATrainingDataset ) # Inference imports - v3.0 with optimal threshold support from .ohca_inference import ( # New v3.0 functions with optimal threshold support load_ohca_model_with_metadata, run_inference_with_optimal_threshold, quick_inference_with_optimal_threshold, process_large_dataset_with_optimal_threshold, analyze_predictions_enhanced, # Legacy functions (backward compatible) load_ohca_model, run_inference, quick_inference, process_large_dataset, test_model_on_sample, get_high_confidence_cases, analyze_predictions, # Dataset class OHCAInferenceDataset ) __version__ = "3.0.0" __author__ = "Mona Moukaddem" __email__ = "your.email@example.com" # v3.0 improved functions (recommended) __improved_training_functions__ = [ "create_patient_level_splits", "complete_improved_training_pipeline", "complete_annotation_and_train_v3", "find_optimal_threshold", "evaluate_on_test_set", "save_model_with_metadata" ] __improved_inference_functions__ = [ "load_ohca_model_with_metadata", "run_inference_with_optimal_threshold", "quick_inference_with_optimal_threshold", "process_large_dataset_with_optimal_threshold", "analyze_predictions_enhanced" ] # Legacy functions (maintained for backward compatibility) __legacy_training_functions__ = [ "create_training_sample", "prepare_training_data", "train_ohca_model", "evaluate_model", "complete_training_pipeline", "complete_annotation_and_train", "OHCATrainingDataset" ] __legacy_inference_functions__ = [ "load_ohca_model", "run_inference", "quick_inference", "process_large_dataset", "test_model_on_sample", "get_high_confidence_cases", "analyze_predictions", "OHCAInferenceDataset" ] # All available functions __all__ = ( __improved_training_functions__ + __improved_inference_functions__ + __legacy_training_functions__ + __legacy_inference_functions__ ) # Methodology information __methodology_version__ = "3.0" __improvements__ = [ "Patient-level data splits prevent data leakage", "Proper train/validation/test methodology", "Optimal threshold finding and consistent usage", "Larger annotation samples (800 train + 200 val)", "Unbiased evaluation on independent test set", "Enhanced clinical decision support", "Backward compatibility with legacy models" ] def get_version_info(): """Return detailed version and methodology information""" return { 'version': __version__, 'methodology_version': __methodology_version__, 'improvements': __improvements__, 'author': __author__, 'recommended_functions': { 'training': 'complete_improved_training_pipeline', 'inference': 'quick_inference_with_optimal_threshold' } } def print_welcome_message(): """Print welcome message with key improvements""" print("="*60) print("NLP OHCA Classifier v3.0 - Improved Methodology") print("="*60) print("Key improvements addressing data scientist feedback:") for improvement in __improvements__: print(f"✅ {improvement}") print() print("Recommended functions:") print("• Training: complete_improved_training_pipeline()") print("• Inference: quick_inference_with_optimal_threshold()") print() print("Legacy functions maintained for backward compatibility.") print("="*60) # Print welcome message when package is imported print_welcome_message()