eoinf's picture
Upload folder using huggingface_hub
4f1d4a2 verified
model_name = "pile_llama_replace_17367_new"
dataset_name = "eoinf/PL_Replace17367_L2_alldataset"
n_layers = 8
d_model = 512
d_mlp = 2048
d_head = 64
n_heads = 8
attn_only = false
layer_norm_eps = 1e-05
init_range = 0.02
n_ctx = 1024
d_vocab = 32000
seed = 10
device = "cuda"
use_bfloat16_matmul = false
batch_size_per_device = 32
n_devices = 1
batches_per_step = 1
max_tokens = 200000000
lr_hidden = 0.002
lr_vector = 0.001
lr_schedule = "constant_with_warmup"
warmup_tokens = 30000000
weight_decay = 0.05
grad_norm_clip = 1.0
train_loss_moving_average_beta = 0.99
log_interval = 25
save_checkpoints = true
checkpoint_interval = 500
checkpoint_interval_ratio = 1.1
save_log_checkpoints = true