File size: 969 Bytes
dc6f64d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
{
    "dtype": "torch.float16",
    "train_dataset": "EYLSFTStaticDataset",
    "train_dataset_len": 9223372036854775807,
    "test_dataset": "EYLSFTStaticDataset",
    "test_dataset_len": 9223372036854775807,
    "n_layers": "24",
    "n_heads": "16",
    "embedding_dim": "1024",
    "dropout_rate": "0.2",
    "use_bias": "True",
    "block_size": "1024",
    "vocab_size": "50257",
    "model_name": "gpt2-medium/dropout",
    "hf_model": "gpt2-medium",
    "grad_clip": "1.0",
    "exp_name": "experiment_name",
    "batch_size": "4",
    "lr": "0.0001",
    "lora_rank": "0",
    "pretrain": "huggingface",
    "activation_checkpointing": "False",
    "finetune_method": "",
    "total_epochs": "1",
    "max_steps": "50000",
    "actor_weights": "",
    "critic_weights": "",
    "reward_model_weights": "",
    "sft_model_weights": "",
    "actor_lr": "5e-06",
    "critic_lr": "9e-06",
    "kl_beta": "0.02",
    "adam_beta1": "0.9",
    "adam_beta2": "0.95"
}