[experiment] name = "binary-56" type = "binary" [dataset] path = "thejosango/nuha-dataset" dataset_revision = "main" augment_ratio = 0.75 undersampling_strategy = false [model] pretrained_model_name_or_path = "thejosango/nuha-mlm" revision = "ce20f497544665775129f9ff5b3cd2a3e350dce8" num_hidden_layers = 4 classifier_dropout = 0.50 [training] num_train_epochs = 5 warmup_steps = 0 lr_scheduler_type = "linear" learning_rate = 5e-5 per_device_train_batch_size = 64 per_device_eval_batch_size = 64 gradient_accumulation_steps = 1 weight_decay = 1e-3 label_smoothing_factor = 0.1 weighted_loss = true early_stopping_patience = 5 early_stopping_threshold = 0.005