model_name: pythia-1b out_dir: /data/users/zichunyu/out/pythia-1b/fineweb/sample-100BT resume: false data: class_path: litgpt.data.FineWeb init_args: data_path: /data/users/zichunyu/data/fineweb/sample-100BT val_split_fraction: 0.0005 seed: 42 num_workers: 8 train: save_interval: 5000 log_interval: 50 global_batch_size: 512 micro_batch_size: 16 lr_warmup_steps: 2000 max_tokens: 50000000000 tie_embeddings: false max_norm: 1.0 min_lr: 4.0e-05 eval: interval: 200000 max_iters: 100 initial_validation: false optimizer: class_path: torch.optim.AdamW init_args: lr: 0.0004 weight_decay: 0.1 betas: - 0.9 - 0.95 devices: auto tokenizer_dir: checkpoints/EleutherAI/pythia-1b logger_name: wandb exp_name: pythia-1b_fineweb_sample-100BT seed: 42