| { | |
| "out_dir": "/workspace/runs/tiny-mm-clean", | |
| "tokenizer_dir": "/workspace/tokenizer_8k_clean", | |
| "resume_from": "/workspace/runs/tiny-mm-clean/ckpt_last.pt", | |
| "train_tokenizer": false, | |
| "tokenizer_samples": 50000, | |
| "preview": 0, | |
| "vocab_size": 8192, | |
| "d_model": 304, | |
| "n_layers": 8, | |
| "n_heads": 8, | |
| "max_seq_len": 512, | |
| "image_size": 128, | |
| "patch_size": 16, | |
| "dropout": 0.0, | |
| "batch_size": 64, | |
| "grad_accum": 1, | |
| "max_steps": 100000, | |
| "lr": 0.0003, | |
| "min_lr": 3e-05, | |
| "warmup_steps": 1000, | |
| "weight_decay": 0.1, | |
| "grad_clip": 1.0, | |
| "num_workers": 1, | |
| "save_every": 2000, | |
| "log_every": 10, | |
| "seed": 4242, | |
| "device": "cuda", | |
| "dtype": "bfloat16", | |
| "mix_tinystories": 0.5, | |
| "mix_cosmopedia": 0.15, | |
| "mix_coco": 0.25, | |
| "mix_flickr": 0.1, | |
| "coco_dataset": "jxie/coco_captions", | |
| "coco_split": "train", | |
| "flickr_dataset": "Mozilla/flickr30k-transformed-captions", | |
| "flickr_split": "test", | |
| "text_shuffle_buffer": 10000, | |
| "image_shuffle_buffer": 2000, | |
| "use_tinystories": true, | |
| "use_cosmopedia": true | |
| } |