tiny-lm-125m / training_config.json
moos124's picture
Upload Kaggle TPU pretrained tiny causal LM
2ca0428 verified
{
"dataset_name": "HuggingFaceFW/fineweb-edu-score-2",
"dataset_config": null,
"split": "train",
"text_column": "text",
"streaming": true,
"download_rows": null,
"shuffle_buffer": 50000,
"preprocessing_batch_size": 128,
"iterable_shards_when_downloaded": 1024,
"tokenizer_name": "gpt2",
"block_size": 2048,
"model_preset": "tiny_125m",
"n_layer": null,
"n_embd": null,
"n_head": null,
"resid_pdrop": 0.0,
"embd_pdrop": 0.0,
"attn_pdrop": 0.0,
"gradient_checkpointing": false,
"max_parameters": 600000000,
"num_tpu_processes": 1,
"per_device_batch_size": 8,
"gradient_accumulation_steps": 4,
"max_steps": 10000,
"learning_rate": 0.0003,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"warmup_steps": 100,
"max_grad_norm": 1.0,
"num_workers": 0,
"seed": 42,
"log_every": 20,
"save_every": 100,
"output_dir": "/kaggle/working/tiny-lm-tpu",
"resume_from": null,
"push_to_hub": true,
"hub_model_id": "moos124/tiny-lm-125m"
}