monajm36
/

ohca-classifier-v3

Model card Files Files and versions

xet

Community

monajm36 commited on Sep 12, 2025

Commit

9af99d7

unverified ·

1 Parent(s): a5114c2

Update init.py

Browse files

Files changed (1) hide show

src/__init__.py +107 -14

src/__init__.py CHANGED Viewed

@@ -1,28 +1,52 @@
 """
-NLP OHCA Classifier
 A BERT-based classifier for detecting Out-of-Hospital Cardiac Arrest (OHCA)
-cases in medical discharge notes.
-This package contains two main modules:
-1. ohca_training_pipeline: Complete training pipeline from annotation to model training
-2. ohca_inference: Apply pre-trained models to new datasets
 """
-# Training pipeline imports
 from .ohca_training_pipeline import (
     create_training_sample,
     prepare_training_data,
     train_ohca_model,
     evaluate_model,
     complete_training_pipeline,
     complete_annotation_and_train,
     OHCATrainingDataset
 )
-# Inference imports
 from .ohca_inference import (
     load_ohca_model,
     run_inference,
     quick_inference,
@@ -30,15 +54,35 @@ from .ohca_inference import (
     test_model_on_sample,
     get_high_confidence_cases,
     analyze_predictions,
     OHCAInferenceDataset
 )
-__version__ = "1.0.0"
 __author__ = "Mona Moukaddem"
 __email__ = "your.email@example.com"
-# Training pipeline functions
-__training_functions__ = [
     "create_training_sample",
     "prepare_training_data",
     "train_ohca_model",
@@ -48,8 +92,7 @@ __training_functions__ = [
     "OHCATrainingDataset"
 ]
-# Inference functions
-__inference_functions__ = [
     "load_ohca_model",
     "run_inference",
     "quick_inference",
@@ -60,4 +103,54 @@ __inference_functions__ = [
     "OHCAInferenceDataset"
 ]
-__all__ = __training_functions__ + __inference_functions__

 """
+NLP OHCA Classifier v3.0 - Improved Methodology
 A BERT-based classifier for detecting Out-of-Hospital Cardiac Arrest (OHCA)
+cases in medical discharge notes using improved machine learning methodology.
+Key Improvements in v3.0:
+- Patient-level data splits to prevent data leakage
+- Proper train/validation/test methodology
+- Optimal threshold finding and usage
+- Larger annotation samples for better performance
+- Unbiased evaluation framework
+This package contains two main modules:
+1. ohca_training_pipeline: Complete training pipeline with improved methodology
+2. ohca_inference: Apply pre-trained models with optimal threshold support
 """
+# Training pipeline imports - v3.0 with improvements
 from .ohca_training_pipeline import (
+    # Improved functions
+    create_patient_level_splits,
+    complete_improved_training_pipeline,
+    complete_annotation_and_train_v3,
+    find_optimal_threshold,
+    evaluate_on_test_set,
+    save_model_with_metadata,
+    # Legacy functions (backward compatible)
     create_training_sample,
     prepare_training_data,
     train_ohca_model,
     evaluate_model,
     complete_training_pipeline,
     complete_annotation_and_train,
+    # Dataset class
     OHCATrainingDataset
 )
+# Inference imports - v3.0 with optimal threshold support
 from .ohca_inference import (
+    # New v3.0 functions with optimal threshold support
+    load_ohca_model_with_metadata,
+    run_inference_with_optimal_threshold,
+    quick_inference_with_optimal_threshold,
+    process_large_dataset_with_optimal_threshold,
+    analyze_predictions_enhanced,
+    # Legacy functions (backward compatible)
     load_ohca_model,
     run_inference,
     quick_inference,
     test_model_on_sample,
     get_high_confidence_cases,
     analyze_predictions,
+    # Dataset class
     OHCAInferenceDataset
 )
+__version__ = "3.0.0"
 __author__ = "Mona Moukaddem"
 __email__ = "your.email@example.com"
+# v3.0 improved functions (recommended)
+__improved_training_functions__ = [
+    "create_patient_level_splits",
+    "complete_improved_training_pipeline",
+    "complete_annotation_and_train_v3",
+    "find_optimal_threshold",
+    "evaluate_on_test_set",
+    "save_model_with_metadata"
+]
+__improved_inference_functions__ = [
+    "load_ohca_model_with_metadata",
+    "run_inference_with_optimal_threshold",
+    "quick_inference_with_optimal_threshold",
+    "process_large_dataset_with_optimal_threshold",
+    "analyze_predictions_enhanced"
+]
+# Legacy functions (maintained for backward compatibility)
+__legacy_training_functions__ = [
     "create_training_sample",
     "prepare_training_data",
     "train_ohca_model",
     "OHCATrainingDataset"
 ]
+__legacy_inference_functions__ = [
     "load_ohca_model",
     "run_inference",
     "quick_inference",
     "OHCAInferenceDataset"
 ]
+# All available functions
+__all__ = (
+    __improved_training_functions__ +
+    __improved_inference_functions__ +
+    __legacy_training_functions__ +
+    __legacy_inference_functions__
+)
+# Methodology information
+__methodology_version__ = "3.0"
+__improvements__ = [
+    "Patient-level data splits prevent data leakage",
+    "Proper train/validation/test methodology",
+    "Optimal threshold finding and consistent usage",
+    "Larger annotation samples (800 train + 200 val)",
+    "Unbiased evaluation on independent test set",
+    "Enhanced clinical decision support",
+    "Backward compatibility with legacy models"
+]
+def get_version_info():
+    """Return detailed version and methodology information"""
+    return {
+        'version': __version__,
+        'methodology_version': __methodology_version__,
+        'improvements': __improvements__,
+        'author': __author__,
+        'recommended_functions': {
+            'training': 'complete_improved_training_pipeline',
+            'inference': 'quick_inference_with_optimal_threshold'
+        }
+    }
+def print_welcome_message():
+    """Print welcome message with key improvements"""
+    print("="*60)
+    print("NLP OHCA Classifier v3.0 - Improved Methodology")
+    print("="*60)
+    print("Key improvements addressing data scientist feedback:")
+    for improvement in __improvements__:
+        print(f"✅ {improvement}")
+    print()
+    print("Recommended functions:")
+    print("• Training: complete_improved_training_pipeline()")
+    print("• Inference: quick_inference_with_optimal_threshold()")
+    print()
+    print("Legacy functions maintained for backward compatibility.")
+    print("="*60)
+# Print welcome message when package is imported
+print_welcome_message()

Update __init__.py

Update init.py