dtype=uint16 vocab=8192 eot=0 train_tokens=477521740 val_tokens=9456433 mix=tinystories:60,tinystories_instruct:15,simple_wiki:15,tiny_textbooks:10