model / config.toml
eoinf's picture
Upload folder using huggingface_hub
64bc99d verified
model_name = "pile_llama_mix_within_rows_pile_all_random_tokens_uniform_frac0d2"
dataset_name = "eoinf/pile_llama_mix_within_rows_pile_all_random_tokens_uniform_frac0d2"
n_layers = 2
d_model = 512
d_mlp = 2048
d_head = 64
n_heads = 8
attn_only = false
layer_norm_eps = 1e-05
init_range = 0.02
n_ctx = 1024
d_vocab = 32000
seed = 10
device = "cuda"
use_bfloat16_matmul = false
batch_size_per_device = 32
n_devices = 1
batches_per_step = 1
max_tokens = 200000000
lr_hidden = 0.001
lr_vector = 0.0005
lr_schedule = "constant_with_warmup"
warmup_tokens = 30000000
weight_decay = 0.05
grad_norm_clip = 1.0
train_loss_moving_average_beta = 0.99
log_interval = 25
save_checkpoints = true
checkpoint_interval = 500
checkpoint_interval_ratio = 1.1
save_log_checkpoints = true