moos124
/

tiny-lm-125m

Model card Files Files and versions

tiny-lm-125m / training_config.json

moos124's picture

Upload Kaggle TPU pretrained tiny causal LM

2ca0428 verified 11 days ago

history blame contribute delete

998 Bytes

	{
	"dataset_name": "HuggingFaceFW/fineweb-edu-score-2",
	"dataset_config": null,
	"split": "train",
	"text_column": "text",
	"streaming": true,
	"download_rows": null,
	"shuffle_buffer": 50000,
	"preprocessing_batch_size": 128,
	"iterable_shards_when_downloaded": 1024,
	"tokenizer_name": "gpt2",
	"block_size": 2048,
	"model_preset": "tiny_125m",
	"n_layer": null,
	"n_embd": null,
	"n_head": null,
	"resid_pdrop": 0.0,
	"embd_pdrop": 0.0,
	"attn_pdrop": 0.0,
	"gradient_checkpointing": false,
	"max_parameters": 600000000,
	"num_tpu_processes": 1,
	"per_device_batch_size": 8,
	"gradient_accumulation_steps": 4,
	"max_steps": 10000,
	"learning_rate": 0.0003,
	"weight_decay": 0.1,
	"beta1": 0.9,
	"beta2": 0.95,
	"warmup_steps": 100,
	"max_grad_norm": 1.0,
	"num_workers": 0,
	"seed": 42,
	"log_every": 20,
	"save_every": 100,
	"output_dir": "/kaggle/working/tiny-lm-tpu",
	"resume_from": null,
	"push_to_hub": true,
	"hub_model_id": "moos124/tiny-lm-125m"
	}