AAA_half_trained / matformer_config.json
mrinaldi's picture
Upload folder using huggingface_hub
0c40f3f verified
{
"model_class": "BERTModel",
"model_config": {
"name": "Albertone",
"hidden_size": 1024,
"ffn_factor": 3.0,
"vocab_size": 32768,
"bos_token_id": 5,
"eos_token_id": 6,
"pad_token_id": 0,
"mask_token_id": 4,
"masked_substitution_rate": [
0.1,
0.5
],
"cloze_probability": 1.0,
"random_probability": 0.0,
"same_probability": 0.0,
"num_hidden_layers": 28,
"num_attention_heads": 16,
"tie_word_embeddings": true,
"rms_norm_eps": 1e-06,
"attention_type": [],
"max_position_embeddings": 1024,
"block_size_for_attention": 128,
"compile_flexattn": false,
"bias": false,
"default_layer": {
"attn_impl": "flash",
"sliding_window_size": null,
"positional_encoding": "alibi",
"normalization": "rmsnorm",
"normalization_position": "pre",
"ffn_activation": "swiglu",
"hooks": {}
},
"custom_layers": {}
},
"training": {
"optimizer": "muon",
"lr_scheduling": true,
"lr": 0.0005,
"final_lr": 1e-05,
"hold_steps": 0.01,
"weight_decay": 0.01,
"scheduler": "custom",
"gradient_clip_val": 1.0,
"warmup_steps": 0.007,
"max_epochs": 1,
"accumulate_grad_batches": 64,
"seed": 27,
"save_every_n_steps": 500,
"checkpoint_name": "albertone",
"no_decay_for_embedding": true
},
"tokenizer": {
"type": "huggingface",
"pretrained_name": "mrinaldi/Gettone",
"varlen_strategy": "unpadding"
},
"data": {
"data_root": "/mnt/llmdata/data/Albertone_MDAT",
"batch_size": 32,
"num_workers": 1,
"mdat_strategy": "Gettone1024_",
"mdat_view": null,
"wanted_from_strategy": "chunked_for_recurrence"
},
"save_dir": "./checkpoints",
"wandb_project": "Albertone",
"wandb_run_name": "Albertone"
}