| { | |
| "final_step": 20000, | |
| "best_val_loss": 2.3653315782546995, | |
| "dataset": "HuggingFaceFW/fineweb-edu (sample-10BT)", | |
| "tokenizer": "Pretrained LLaMA 2 (NousResearch/Llama-2-7b-hf, 32K vocab)", | |
| "training_config": { | |
| "learning_rate": 0.0003, | |
| "min_learning_rate": 3e-05, | |
| "weight_decay": 0.1, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "adam_eps": 1e-08, | |
| "grad_clip": 1.0, | |
| "warmup_steps": 2000, | |
| "total_steps": 20000, | |
| "micro_batch_size": 4, | |
| "gradient_accumulation_steps": 32, | |
| "dtype": "bfloat16", | |
| "checkpoint_dir": "/content/drive/MyDrive/llm-1b-lab/checkpoints", | |
| "checkpoint_interval": 500, | |
| "max_checkpoints": 3, | |
| "log_interval": 10, | |
| "eval_interval": 500, | |
| "eval_steps": 20, | |
| "wandb_project": "llm-1b-lab", | |
| "wandb_run_name": null, | |
| "wandb_dir": "/content/drive/MyDrive/wandb_logs", | |
| "use_wandb": true, | |
| "seed": 42 | |
| } | |
| } |