{ "dataset_name": "HuggingFaceFW/fineweb-edu-score-2", "dataset_config": null, "split": "train", "text_column": "text", "streaming": true, "download_rows": null, "shuffle_buffer": 50000, "preprocessing_batch_size": 128, "iterable_shards_when_downloaded": 1024, "tokenizer_name": "gpt2", "block_size": 2048, "model_preset": "tiny_125m", "n_layer": null, "n_embd": null, "n_head": null, "resid_pdrop": 0.0, "embd_pdrop": 0.0, "attn_pdrop": 0.0, "gradient_checkpointing": false, "max_parameters": 600000000, "num_tpu_processes": 1, "per_device_batch_size": 8, "gradient_accumulation_steps": 4, "max_steps": 10000, "learning_rate": 0.0003, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "warmup_steps": 100, "max_grad_norm": 1.0, "num_workers": 0, "seed": 42, "log_every": 20, "save_every": 100, "output_dir": "/kaggle/working/tiny-lm-tpu", "resume_from": null, "push_to_hub": true, "hub_model_id": "moos124/tiny-lm-125m" }