{ "out_dir": "/workspace/runs/tiny-mm-clean", "tokenizer_dir": "/workspace/tokenizer_8k_clean", "resume_from": "/workspace/runs/tiny-mm-clean/ckpt_last.pt", "train_tokenizer": false, "tokenizer_samples": 50000, "preview": 0, "vocab_size": 8192, "d_model": 304, "n_layers": 8, "n_heads": 8, "max_seq_len": 512, "image_size": 128, "patch_size": 16, "dropout": 0.0, "batch_size": 64, "grad_accum": 1, "max_steps": 100000, "lr": 0.0003, "min_lr": 3e-05, "warmup_steps": 1000, "weight_decay": 0.1, "grad_clip": 1.0, "num_workers": 1, "save_every": 2000, "log_every": 10, "seed": 4242, "device": "cuda", "dtype": "bfloat16", "mix_tinystories": 0.5, "mix_cosmopedia": 0.15, "mix_coco": 0.25, "mix_flickr": 0.1, "coco_dataset": "jxie/coco_captions", "coco_split": "train", "flickr_dataset": "Mozilla/flickr30k-transformed-captions", "flickr_split": "test", "text_shuffle_buffer": 10000, "image_shuffle_buffer": 2000, "use_tinystories": true, "use_cosmopedia": true }