{ "final_step": 20000, "best_val_loss": 2.3653315782546995, "dataset": "HuggingFaceFW/fineweb-edu (sample-10BT)", "tokenizer": "Pretrained LLaMA 2 (NousResearch/Llama-2-7b-hf, 32K vocab)", "training_config": { "learning_rate": 0.0003, "min_learning_rate": 3e-05, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "adam_eps": 1e-08, "grad_clip": 1.0, "warmup_steps": 2000, "total_steps": 20000, "micro_batch_size": 4, "gradient_accumulation_steps": 32, "dtype": "bfloat16", "checkpoint_dir": "/content/drive/MyDrive/llm-1b-lab/checkpoints", "checkpoint_interval": 500, "max_checkpoints": 3, "log_interval": 10, "eval_interval": 500, "eval_steps": 20, "wandb_project": "llm-1b-lab", "wandb_run_name": null, "wandb_dir": "/content/drive/MyDrive/wandb_logs", "use_wandb": true, "seed": 42 } }