mfarnas commited on
Commit
4e913ea
·
1 Parent(s): c5ba16c

default params yaml

Browse files
src/model_utils.py CHANGED
@@ -10,7 +10,10 @@ MODEL_DIR = Path("src/params")
10
 
11
  import yaml
12
 
13
- def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
 
 
 
14
  if mode not in ["ensemble", "single_model"]:
15
  raise ValueError("mode must be either 'ensemble' or 'single_model'")
16
 
@@ -26,8 +29,15 @@ def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_param
26
 
27
  return params
28
 
29
- def get_model(model_type, mode="ensemble", best_iter=None):
30
- params = load_model_params(model_type, mode)
 
 
 
 
 
 
 
31
 
32
  # iter is set for single_model mode, where
33
  if best_iter is not None:
@@ -60,7 +70,7 @@ def save_model(model, user_model_name, metrics_result_single=None):
60
  login(token=os.environ["HF_TOKEN"])
61
 
62
  timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
63
- filename = f"{user_model_name}_single"
64
 
65
  # Prepare model dict (same as before)
66
  model_data = {
@@ -132,7 +142,7 @@ def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scor
132
  login(token=os.environ["HF_TOKEN"])
133
 
134
  timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
135
- filename = f"{user_model_name}_ensemble"
136
 
137
  ensemble_data = {
138
  "timestamp": timestamp,
 
10
 
11
  import yaml
12
 
13
+ def load_model_params(model_type, target="GVHD", mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
14
+ if target not in ["GVHD", "Acute GVHD(<100 days)", "Chronic GVHD>100 days"]:
15
+ raise ValueError("target must be one of 'GVHD', 'Acute GVHD(<100 days)', or 'Chronic GVHD>100 days'")
16
+
17
  if mode not in ["ensemble", "single_model"]:
18
  raise ValueError("mode must be either 'ensemble' or 'single_model'")
19
 
 
29
 
30
  return params
31
 
32
+ def get_model(model_type, mode="ensemble", target="GVHD", best_iter=None):
33
+ if target == "GVHD":
34
+ path = MODEL_DIR / "model_params_gvhd.yaml"
35
+ elif target == "Acute GVHD(<100 days)":
36
+ path = MODEL_DIR / "model_params_acute.yaml"
37
+ elif target == "Chronic GVHD>100 days":
38
+ path = MODEL_DIR / "model_params_chronic.yaml"
39
+
40
+ params = load_model_params(model_type, target, mode, path)
41
 
42
  # iter is set for single_model mode, where
43
  if best_iter is not None:
 
70
  login(token=os.environ["HF_TOKEN"])
71
 
72
  timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
73
+ filename = f"{timestamp}_{user_model_name}_single"
74
 
75
  # Prepare model dict (same as before)
76
  model_data = {
 
142
  login(token=os.environ["HF_TOKEN"])
143
 
144
  timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
145
+ filename = f"{timestamp}_{user_model_name}_ensemble"
146
 
147
  ensemble_data = {
148
  "timestamp": timestamp,
src/params/model_params_acute.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CatBoost:
2
+ ensemble:
3
+ learning_rate: 0.03
4
+ depth: 8
5
+ loss_function: Logloss
6
+ random_seed: 0
7
+ l2_leaf_reg: 10
8
+ subsample: 1
9
+ grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
10
+ bagging_temperature: .5
11
+ random_strength: 0
12
+ min_data_in_leaf: 20
13
+ iterations: 10000
14
+ early_stopping_rounds: 50
15
+ custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
16
+ logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
17
+ train_dir: '/tmp' # avoid write permission issues
18
+ auto_class_weights: Balanced # or None, or SqrtBalanced
19
+
20
+ single_model:
21
+ # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
22
+ learning_rate: 0.03
23
+ depth: 8
24
+ loss_function: Logloss
25
+ random_seed: 0
26
+ l2_leaf_reg: 10
27
+ subsample: 1
28
+ grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
29
+ bagging_temperature: .5
30
+ random_strength: 0
31
+ min_data_in_leaf: 20
32
+ custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
33
+ logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
34
+ train_dir: '/tmp' # avoid write permission issues
35
+ auto_class_weights: Balanced # or None, or SqrtBalanced
src/params/model_params_chronic.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CatBoost:
2
+ ensemble:
3
+ learning_rate: 0.03
4
+ depth: 6
5
+ loss_function: Logloss
6
+ random_seed: 0
7
+ l2_leaf_reg: 3
8
+ subsample: 0.9
9
+ grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
10
+ bagging_temperature: 1
11
+ random_strength: 3
12
+ min_data_in_leaf: 25
13
+ iterations: 10000
14
+ early_stopping_rounds: 50
15
+ custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
16
+ logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
17
+ train_dir: '/tmp' # avoid write permission issues
18
+ auto_class_weights: Balanced # or None, or SqrtBalanced
19
+
20
+ single_model:
21
+ # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
22
+ learning_rate: 0.03
23
+ depth: 6
24
+ loss_function: Logloss
25
+ random_seed: 0
26
+ l2_leaf_reg: 3
27
+ subsample: .9
28
+ grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
29
+ bagging_temperature: 1
30
+ random_strength: 3
31
+ min_data_in_leaf: 25
32
+ custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
33
+ logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
34
+ train_dir: '/tmp' # avoid write permission issues
35
+ auto_class_weights: Balanced # or None, or SqrtBalanced
src/params/{model_params.yaml → model_params_gvhd.yaml} RENAMED
@@ -7,9 +7,9 @@ CatBoost:
7
  l2_leaf_reg: 3
8
  subsample: 1
9
  grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
10
- bagging_temperature: 1
11
  random_strength: 2
12
- min_data_in_leaf: 20
13
  iterations: 10000
14
  early_stopping_rounds: 50
15
  custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
@@ -17,8 +17,6 @@ CatBoost:
17
  train_dir: '/tmp' # avoid write permission issues
18
  auto_class_weights: Balanced # or None, or SqrtBalanced
19
 
20
- # lr1e1_d12_l27_ss07_gpLg_bag1_rs5_m5
21
-
22
  single_model:
23
  # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
24
  learning_rate: 0.1
@@ -28,10 +26,12 @@ CatBoost:
28
  l2_leaf_reg: 3
29
  subsample: 1
30
  grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
31
- bagging_temperature: 1
32
  random_strength: 2
33
- min_data_in_leaf: 20
34
  custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
35
  logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
36
  train_dir: '/tmp' # avoid write permission issues
37
- auto_class_weights: Balanced # or None, or SqrtBalanced
 
 
 
7
  l2_leaf_reg: 3
8
  subsample: 1
9
  grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
10
+ bagging_temperature: .5
11
  random_strength: 2
12
+ min_data_in_leaf: 15
13
  iterations: 10000
14
  early_stopping_rounds: 50
15
  custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
 
17
  train_dir: '/tmp' # avoid write permission issues
18
  auto_class_weights: Balanced # or None, or SqrtBalanced
19
 
 
 
20
  single_model:
21
  # in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
22
  learning_rate: 0.1
 
26
  l2_leaf_reg: 3
27
  subsample: 1
28
  grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
29
+ bagging_temperature: .5
30
  random_strength: 2
31
+ min_data_in_leaf: 15
32
  custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
33
  logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
34
  train_dir: '/tmp' # avoid write permission issues
35
+ auto_class_weights: Balanced # or None, or SqrtBalanced
36
+
37
+ # lr1e1_d12_l27_ss07_gpLg_bag1_rs5_m5