gvhd-intel-pro

Running

App Files Files Community

mfarnas commited on Jul 8, 2025

Commit

4e913ea

1 Parent(s): c5ba16c

default params yaml

Browse files

Files changed (4) hide show

src/model_utils.py +15 -5
src/params/model_params_acute.yaml +35 -0
src/params/model_params_chronic.yaml +35 -0
src/params/{model_params.yaml → model_params_gvhd.yaml} +7 -7

src/model_utils.py CHANGED Viewed

@@ -10,7 +10,10 @@ MODEL_DIR = Path("src/params")
 import yaml
-def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
     if mode not in ["ensemble", "single_model"]:
         raise ValueError("mode must be either 'ensemble' or 'single_model'")
@@ -26,8 +29,15 @@ def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_param
     return params
-def get_model(model_type, mode="ensemble", best_iter=None):
-    params = load_model_params(model_type, mode)
     # iter is set for single_model mode, where
     if best_iter is not None:
@@ -60,7 +70,7 @@ def save_model(model, user_model_name, metrics_result_single=None):
         login(token=os.environ["HF_TOKEN"])
     timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
-    filename = f"{user_model_name}_single"
     # Prepare model dict (same as before)
     model_data = {
@@ -132,7 +142,7 @@ def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scor
         login(token=os.environ["HF_TOKEN"])
     timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
-    filename = f"{user_model_name}_ensemble"
     ensemble_data = {
         "timestamp": timestamp,

 import yaml
+def load_model_params(model_type, target="GVHD", mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
+    if target not in ["GVHD", "Acute GVHD(<100 days)", "Chronic GVHD>100 days"]:
+        raise ValueError("target must be one of 'GVHD', 'Acute GVHD(<100 days)', or 'Chronic GVHD>100 days'")
     if mode not in ["ensemble", "single_model"]:
         raise ValueError("mode must be either 'ensemble' or 'single_model'")
     return params
+def get_model(model_type, mode="ensemble", target="GVHD", best_iter=None):
+    if target == "GVHD":
+        path = MODEL_DIR / "model_params_gvhd.yaml"
+    elif target == "Acute GVHD(<100 days)":
+        path = MODEL_DIR / "model_params_acute.yaml"
+    elif target == "Chronic GVHD>100 days":
+        path = MODEL_DIR / "model_params_chronic.yaml"
+    params = load_model_params(model_type, target, mode, path)
     # iter is set for single_model mode, where
     if best_iter is not None:
         login(token=os.environ["HF_TOKEN"])
     timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
+    filename = f"{timestamp}_{user_model_name}_single"
     # Prepare model dict (same as before)
     model_data = {
         login(token=os.environ["HF_TOKEN"])
     timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
+    filename = f"{timestamp}_{user_model_name}_ensemble"
     ensemble_data = {
         "timestamp": timestamp,

src/params/model_params_acute.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+CatBoost:
+  ensemble:
+    learning_rate: 0.03
+    depth: 8
+    loss_function: Logloss
+    random_seed: 0
+    l2_leaf_reg: 10
+    subsample: 1
+    grow_policy: Lossguide  # SymmetricTree or Depthwise or Lossguide
+    bagging_temperature: .5
+    random_strength: 0
+    min_data_in_leaf: 20
+    iterations: 10000
+    early_stopping_rounds: 50
+    custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
+    logging_level: 'Silent'  # or 'Verbose', 'Info', 'Debug'
+    train_dir: '/tmp'         # avoid write permission issues
+    auto_class_weights: Balanced  # or None, or SqrtBalanced
+  single_model:
+    # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
+    learning_rate: 0.03
+    depth: 8
+    loss_function: Logloss
+    random_seed: 0
+    l2_leaf_reg: 10
+    subsample: 1
+    grow_policy: Lossguide  # SymmetricTree or Depthwise or Lossguide
+    bagging_temperature: .5
+    random_strength: 0
+    min_data_in_leaf: 20
+    custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
+    logging_level: 'Silent'  # or 'Verbose', 'Info', 'Debug'
+    train_dir: '/tmp'         # avoid write permission issues
+    auto_class_weights: Balanced  # or None, or SqrtBalanced

src/params/model_params_chronic.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+CatBoost:
+  ensemble:
+    learning_rate: 0.03
+    depth: 6
+    loss_function: Logloss
+    random_seed: 0
+    l2_leaf_reg: 3
+    subsample: 0.9
+    grow_policy: Lossguide  # SymmetricTree or Depthwise or Lossguide
+    bagging_temperature: 1
+    random_strength: 3
+    min_data_in_leaf: 25
+    iterations: 10000
+    early_stopping_rounds: 50
+    custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
+    logging_level: 'Silent'  # or 'Verbose', 'Info', 'Debug'
+    train_dir: '/tmp'         # avoid write permission issues
+    auto_class_weights: Balanced  # or None, or SqrtBalanced
+  single_model:
+    # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
+    learning_rate: 0.03
+    depth: 6
+    loss_function: Logloss
+    random_seed: 0
+    l2_leaf_reg: 3
+    subsample: .9
+    grow_policy: Lossguide  # SymmetricTree or Depthwise or Lossguide
+    bagging_temperature: 1
+    random_strength: 3
+    min_data_in_leaf: 25
+    custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
+    logging_level: 'Silent'  # or 'Verbose', 'Info', 'Debug'
+    train_dir: '/tmp'         # avoid write permission issues
+    auto_class_weights: Balanced  # or None, or SqrtBalanced

src/params/{model_params.yaml → model_params_gvhd.yaml} RENAMED Viewed

@@ -7,9 +7,9 @@ CatBoost:
     l2_leaf_reg: 3
     subsample: 1
     grow_policy: SymmetricTree  # SymmetricTree or Depthwise or Lossguide
-    bagging_temperature: 1
     random_strength: 2
-    min_data_in_leaf: 20
     iterations: 10000
     early_stopping_rounds: 50
     custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
@@ -17,8 +17,6 @@ CatBoost:
     train_dir: '/tmp'         # avoid write permission issues
     auto_class_weights: Balanced  # or None, or SqrtBalanced
-# lr1e1_d12_l27_ss07_gpLg_bag1_rs5_m5
   single_model:
     # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
     learning_rate: 0.1
@@ -28,10 +26,12 @@ CatBoost:
     l2_leaf_reg: 3
     subsample: 1
     grow_policy: SymmetricTree  # SymmetricTree or Depthwise or Lossguide
-    bagging_temperature: 1
     random_strength: 2
-    min_data_in_leaf: 20
     custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
     logging_level: 'Silent'  # or 'Verbose', 'Info', 'Debug'
     train_dir: '/tmp'         # avoid write permission issues
-    auto_class_weights: Balanced  # or None, or SqrtBalanced

     l2_leaf_reg: 3
     subsample: 1
     grow_policy: SymmetricTree  # SymmetricTree or Depthwise or Lossguide
+    bagging_temperature: .5
     random_strength: 2
+    min_data_in_leaf: 15
     iterations: 10000
     early_stopping_rounds: 50
     custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
     train_dir: '/tmp'         # avoid write permission issues
     auto_class_weights: Balanced  # or None, or SqrtBalanced
   single_model:
     # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
     learning_rate: 0.1
     l2_leaf_reg: 3
     subsample: 1
     grow_policy: SymmetricTree  # SymmetricTree or Depthwise or Lossguide
+    bagging_temperature: .5
     random_strength: 2
+    min_data_in_leaf: 15
     custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
     logging_level: 'Silent'  # or 'Verbose', 'Info', 'Debug'
     train_dir: '/tmp'         # avoid write permission issues
+    auto_class_weights: Balanced  # or None, or SqrtBalanced
+# lr1e1_d12_l27_ss07_gpLg_bag1_rs5_m5