File size: 969 Bytes
dc6f64d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
{
"dtype": "torch.float16",
"train_dataset": "EYLSFTStaticDataset",
"train_dataset_len": 9223372036854775807,
"test_dataset": "EYLSFTStaticDataset",
"test_dataset_len": 9223372036854775807,
"n_layers": "24",
"n_heads": "16",
"embedding_dim": "1024",
"dropout_rate": "0.2",
"use_bias": "True",
"block_size": "1024",
"vocab_size": "50257",
"model_name": "gpt2-medium/dropout",
"hf_model": "gpt2-medium",
"grad_clip": "1.0",
"exp_name": "experiment_name",
"batch_size": "4",
"lr": "0.0001",
"lora_rank": "0",
"pretrain": "huggingface",
"activation_checkpointing": "False",
"finetune_method": "",
"total_epochs": "1",
"max_steps": "50000",
"actor_weights": "",
"critic_weights": "",
"reward_model_weights": "",
"sft_model_weights": "",
"actor_lr": "5e-06",
"critic_lr": "9e-06",
"kl_beta": "0.02",
"adam_beta1": "0.9",
"adam_beta2": "0.95"
} |