Spaces:
Running
Running
mfarnas commited on
Commit ·
4e913ea
1
Parent(s): c5ba16c
default params yaml
Browse files
src/model_utils.py
CHANGED
|
@@ -10,7 +10,10 @@ MODEL_DIR = Path("src/params")
|
|
| 10 |
|
| 11 |
import yaml
|
| 12 |
|
| 13 |
-
def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
|
|
|
|
|
|
|
|
|
|
| 14 |
if mode not in ["ensemble", "single_model"]:
|
| 15 |
raise ValueError("mode must be either 'ensemble' or 'single_model'")
|
| 16 |
|
|
@@ -26,8 +29,15 @@ def load_model_params(model_type, mode="ensemble", path=MODEL_DIR / "model_param
|
|
| 26 |
|
| 27 |
return params
|
| 28 |
|
| 29 |
-
def get_model(model_type, mode="ensemble", best_iter=None):
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# iter is set for single_model mode, where
|
| 33 |
if best_iter is not None:
|
|
@@ -60,7 +70,7 @@ def save_model(model, user_model_name, metrics_result_single=None):
|
|
| 60 |
login(token=os.environ["HF_TOKEN"])
|
| 61 |
|
| 62 |
timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
|
| 63 |
-
filename = f"{user_model_name}_single"
|
| 64 |
|
| 65 |
# Prepare model dict (same as before)
|
| 66 |
model_data = {
|
|
@@ -132,7 +142,7 @@ def save_model_ensemble(models, user_model_name, best_iterations=None, fold_scor
|
|
| 132 |
login(token=os.environ["HF_TOKEN"])
|
| 133 |
|
| 134 |
timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
|
| 135 |
-
filename = f"{user_model_name}_ensemble"
|
| 136 |
|
| 137 |
ensemble_data = {
|
| 138 |
"timestamp": timestamp,
|
|
|
|
| 10 |
|
| 11 |
import yaml
|
| 12 |
|
| 13 |
+
def load_model_params(model_type, target="GVHD", mode="ensemble", path=MODEL_DIR / "model_params.yaml"):
|
| 14 |
+
if target not in ["GVHD", "Acute GVHD(<100 days)", "Chronic GVHD>100 days"]:
|
| 15 |
+
raise ValueError("target must be one of 'GVHD', 'Acute GVHD(<100 days)', or 'Chronic GVHD>100 days'")
|
| 16 |
+
|
| 17 |
if mode not in ["ensemble", "single_model"]:
|
| 18 |
raise ValueError("mode must be either 'ensemble' or 'single_model'")
|
| 19 |
|
|
|
|
| 29 |
|
| 30 |
return params
|
| 31 |
|
| 32 |
+
def get_model(model_type, mode="ensemble", target="GVHD", best_iter=None):
|
| 33 |
+
if target == "GVHD":
|
| 34 |
+
path = MODEL_DIR / "model_params_gvhd.yaml"
|
| 35 |
+
elif target == "Acute GVHD(<100 days)":
|
| 36 |
+
path = MODEL_DIR / "model_params_acute.yaml"
|
| 37 |
+
elif target == "Chronic GVHD>100 days":
|
| 38 |
+
path = MODEL_DIR / "model_params_chronic.yaml"
|
| 39 |
+
|
| 40 |
+
params = load_model_params(model_type, target, mode, path)
|
| 41 |
|
| 42 |
# iter is set for single_model mode, where
|
| 43 |
if best_iter is not None:
|
|
|
|
| 70 |
login(token=os.environ["HF_TOKEN"])
|
| 71 |
|
| 72 |
timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
|
| 73 |
+
filename = f"{timestamp}_{user_model_name}_single"
|
| 74 |
|
| 75 |
# Prepare model dict (same as before)
|
| 76 |
model_data = {
|
|
|
|
| 142 |
login(token=os.environ["HF_TOKEN"])
|
| 143 |
|
| 144 |
timestamp = datetime.now().strftime("%y%m%d_%H%M%S")
|
| 145 |
+
filename = f"{timestamp}_{user_model_name}_ensemble"
|
| 146 |
|
| 147 |
ensemble_data = {
|
| 148 |
"timestamp": timestamp,
|
src/params/model_params_acute.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CatBoost:
|
| 2 |
+
ensemble:
|
| 3 |
+
learning_rate: 0.03
|
| 4 |
+
depth: 8
|
| 5 |
+
loss_function: Logloss
|
| 6 |
+
random_seed: 0
|
| 7 |
+
l2_leaf_reg: 10
|
| 8 |
+
subsample: 1
|
| 9 |
+
grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
|
| 10 |
+
bagging_temperature: .5
|
| 11 |
+
random_strength: 0
|
| 12 |
+
min_data_in_leaf: 20
|
| 13 |
+
iterations: 10000
|
| 14 |
+
early_stopping_rounds: 50
|
| 15 |
+
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
| 16 |
+
logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
|
| 17 |
+
train_dir: '/tmp' # avoid write permission issues
|
| 18 |
+
auto_class_weights: Balanced # or None, or SqrtBalanced
|
| 19 |
+
|
| 20 |
+
single_model:
|
| 21 |
+
# in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
|
| 22 |
+
learning_rate: 0.03
|
| 23 |
+
depth: 8
|
| 24 |
+
loss_function: Logloss
|
| 25 |
+
random_seed: 0
|
| 26 |
+
l2_leaf_reg: 10
|
| 27 |
+
subsample: 1
|
| 28 |
+
grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
|
| 29 |
+
bagging_temperature: .5
|
| 30 |
+
random_strength: 0
|
| 31 |
+
min_data_in_leaf: 20
|
| 32 |
+
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
| 33 |
+
logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
|
| 34 |
+
train_dir: '/tmp' # avoid write permission issues
|
| 35 |
+
auto_class_weights: Balanced # or None, or SqrtBalanced
|
src/params/model_params_chronic.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CatBoost:
|
| 2 |
+
ensemble:
|
| 3 |
+
learning_rate: 0.03
|
| 4 |
+
depth: 6
|
| 5 |
+
loss_function: Logloss
|
| 6 |
+
random_seed: 0
|
| 7 |
+
l2_leaf_reg: 3
|
| 8 |
+
subsample: 0.9
|
| 9 |
+
grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
|
| 10 |
+
bagging_temperature: 1
|
| 11 |
+
random_strength: 3
|
| 12 |
+
min_data_in_leaf: 25
|
| 13 |
+
iterations: 10000
|
| 14 |
+
early_stopping_rounds: 50
|
| 15 |
+
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
| 16 |
+
logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
|
| 17 |
+
train_dir: '/tmp' # avoid write permission issues
|
| 18 |
+
auto_class_weights: Balanced # or None, or SqrtBalanced
|
| 19 |
+
|
| 20 |
+
single_model:
|
| 21 |
+
# in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
|
| 22 |
+
learning_rate: 0.03
|
| 23 |
+
depth: 6
|
| 24 |
+
loss_function: Logloss
|
| 25 |
+
random_seed: 0
|
| 26 |
+
l2_leaf_reg: 3
|
| 27 |
+
subsample: .9
|
| 28 |
+
grow_policy: Lossguide # SymmetricTree or Depthwise or Lossguide
|
| 29 |
+
bagging_temperature: 1
|
| 30 |
+
random_strength: 3
|
| 31 |
+
min_data_in_leaf: 25
|
| 32 |
+
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
| 33 |
+
logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
|
| 34 |
+
train_dir: '/tmp' # avoid write permission issues
|
| 35 |
+
auto_class_weights: Balanced # or None, or SqrtBalanced
|
src/params/{model_params.yaml → model_params_gvhd.yaml}
RENAMED
|
@@ -7,9 +7,9 @@ CatBoost:
|
|
| 7 |
l2_leaf_reg: 3
|
| 8 |
subsample: 1
|
| 9 |
grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
|
| 10 |
-
bagging_temperature:
|
| 11 |
random_strength: 2
|
| 12 |
-
min_data_in_leaf:
|
| 13 |
iterations: 10000
|
| 14 |
early_stopping_rounds: 50
|
| 15 |
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
|
@@ -17,8 +17,6 @@ CatBoost:
|
|
| 17 |
train_dir: '/tmp' # avoid write permission issues
|
| 18 |
auto_class_weights: Balanced # or None, or SqrtBalanced
|
| 19 |
|
| 20 |
-
# lr1e1_d12_l27_ss07_gpLg_bag1_rs5_m5
|
| 21 |
-
|
| 22 |
single_model:
|
| 23 |
# in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
|
| 24 |
learning_rate: 0.1
|
|
@@ -28,10 +26,12 @@ CatBoost:
|
|
| 28 |
l2_leaf_reg: 3
|
| 29 |
subsample: 1
|
| 30 |
grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
|
| 31 |
-
bagging_temperature:
|
| 32 |
random_strength: 2
|
| 33 |
-
min_data_in_leaf:
|
| 34 |
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
| 35 |
logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
|
| 36 |
train_dir: '/tmp' # avoid write permission issues
|
| 37 |
-
auto_class_weights: Balanced # or None, or SqrtBalanced
|
|
|
|
|
|
|
|
|
| 7 |
l2_leaf_reg: 3
|
| 8 |
subsample: 1
|
| 9 |
grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
|
| 10 |
+
bagging_temperature: .5
|
| 11 |
random_strength: 2
|
| 12 |
+
min_data_in_leaf: 15
|
| 13 |
iterations: 10000
|
| 14 |
early_stopping_rounds: 50
|
| 15 |
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
|
|
|
| 17 |
train_dir: '/tmp' # avoid write permission issues
|
| 18 |
auto_class_weights: Balanced # or None, or SqrtBalanced
|
| 19 |
|
|
|
|
|
|
|
| 20 |
single_model:
|
| 21 |
# in this mode, the model is trained on the entire dataset using the best_iter obtained from cross-validation
|
| 22 |
learning_rate: 0.1
|
|
|
|
| 26 |
l2_leaf_reg: 3
|
| 27 |
subsample: 1
|
| 28 |
grow_policy: SymmetricTree # SymmetricTree or Depthwise or Lossguide
|
| 29 |
+
bagging_temperature: .5
|
| 30 |
random_strength: 2
|
| 31 |
+
min_data_in_leaf: 15
|
| 32 |
custom_loss: ['AUC', "F1", "Accuracy", "Precision", "Recall", "BrierScore", "Logloss"]
|
| 33 |
logging_level: 'Silent' # or 'Verbose', 'Info', 'Debug'
|
| 34 |
train_dir: '/tmp' # avoid write permission issues
|
| 35 |
+
auto_class_weights: Balanced # or None, or SqrtBalanced
|
| 36 |
+
|
| 37 |
+
# lr1e1_d12_l27_ss07_gpLg_bag1_rs5_m5
|