| { | |
| "name": "Beeper", | |
| "dataset_name": "roneneldan/TinyStories", | |
| "dataset_split": "train[:10%]", | |
| "context": 512, | |
| "vocab_size": 8192, | |
| "add_bos_eos": true, | |
| "val_ratio": 0.01, | |
| "test_ratio": 0.01, | |
| "dim": 512, | |
| "n_layers": 6, | |
| "n_heads": 8, | |
| "mlp_ratio": 4.0, | |
| "dropout": 0.0, | |
| "resid_dropout": 0.1, | |
| "grad_checkpoint": false, | |
| "compile_model": true, | |
| "batch_size": 32, | |
| "grad_accum_steps": 1, | |
| "epochs": 3, | |
| "lr": 0.0003, | |
| "betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "weight_decay": 0.1, | |
| "warmup_steps": 500, | |
| "max_steps": null, | |
| "clip_grad": 1.0, | |
| "min_lr": 1e-06, | |
| "label_smoothing": 0.0, | |
| "mixed_precision": "bf16", | |
| "log_dir": "./runs/beeper", | |
| "log_interval": 50, | |
| "ckpt_dir": "./beeper_checkpoints", | |
| "export_dir": "./beeper_export", | |
| "temperature": 0.9, | |
| "top_k": 40, | |
| "top_p": 0.9, | |
| "repetition_penalty": 1.1, | |
| "presence_penalty": 0.6, | |
| "frequency_penalty": 0.0, | |
| "upload_to_hub": true, | |
| "hf_repo": "AbstractPhil/beeper-tinystories-6l-512d-ctx512", | |
| "tokenizer_path": "beeper.tokenizer.json", | |
| "seed": 1337 | |
| } |