[experiment]
name = "binary-56"
type = "binary"


[dataset]
path = "thejosango/nuha-dataset"
dataset_revision = "main"
augment_ratio = 0.75
undersampling_strategy = false 


[model]
pretrained_model_name_or_path = "thejosango/nuha-mlm"
revision = "ce20f497544665775129f9ff5b3cd2a3e350dce8"
num_hidden_layers = 4 
classifier_dropout = 0.50

[training]
num_train_epochs = 5 
warmup_steps = 0 
lr_scheduler_type = "linear"
learning_rate = 5e-5
per_device_train_batch_size = 64 
per_device_eval_batch_size = 64
gradient_accumulation_steps = 1
weight_decay = 1e-3
label_smoothing_factor = 0.1
weighted_loss = true 
early_stopping_patience = 5
early_stopping_threshold = 0.005