File size: 671 Bytes
3c6e625
b13ab70
3c6e625
 
 
 
 
 
aa7b772
152141c
3c6e625
 
 
cc3ed62
f54519f
4a30404
b13ab70
3c6e625
 
4c790e4
 
2dfdcee
bf5de58
4c790e4
6aaa15d
 
f54519f
aca5e41
2dfdcee
ee0cc92
cc3ed62
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
[experiment]
name = "binary-56"
type = "binary"


[dataset]
path = "thejosango/nuha-dataset"
dataset_revision = "main"
augment_ratio = 0.75
undersampling_strategy = false 


[model]
pretrained_model_name_or_path = "thejosango/nuha-mlm"
revision = "ce20f497544665775129f9ff5b3cd2a3e350dce8"
num_hidden_layers = 4 
classifier_dropout = 0.50

[training]
num_train_epochs = 5 
warmup_steps = 0 
lr_scheduler_type = "linear"
learning_rate = 5e-5
per_device_train_batch_size = 64 
per_device_eval_batch_size = 64
gradient_accumulation_steps = 1
weight_decay = 1e-3
label_smoothing_factor = 0.1
weighted_loss = true 
early_stopping_patience = 5
early_stopping_threshold = 0.005