{ "model_class": "BERTModel", "model_config": { "name": "Albertone", "hidden_size": 1024, "ffn_factor": 3.0, "vocab_size": 32768, "bos_token_id": 5, "eos_token_id": 6, "pad_token_id": 0, "mask_token_id": 4, "masked_substitution_rate": [ 0.1, 0.5 ], "cloze_probability": 1.0, "random_probability": 0.0, "same_probability": 0.0, "num_hidden_layers": 28, "num_attention_heads": 16, "tie_word_embeddings": true, "rms_norm_eps": 1e-06, "attention_type": [], "max_position_embeddings": 1024, "block_size_for_attention": 128, "compile_flexattn": false, "bias": false, "default_layer": { "attn_impl": "flash", "sliding_window_size": null, "positional_encoding": "alibi", "normalization": "rmsnorm", "normalization_position": "pre", "ffn_activation": "swiglu", "hooks": {} }, "custom_layers": {} }, "training": { "optimizer": "muon", "lr_scheduling": true, "lr": 0.0005, "final_lr": 1e-05, "hold_steps": 0.01, "weight_decay": 0.01, "scheduler": "custom", "gradient_clip_val": 1.0, "warmup_steps": 0.007, "max_epochs": 1, "accumulate_grad_batches": 64, "seed": 27, "save_every_n_steps": 500, "checkpoint_name": "albertone", "no_decay_for_embedding": true }, "tokenizer": { "type": "huggingface", "pretrained_name": "mrinaldi/Gettone", "varlen_strategy": "unpadding" }, "data": { "data_root": "/mnt/llmdata/data/Albertone_MDAT", "batch_size": 32, "num_workers": 1, "mdat_strategy": "Gettone1024_", "mdat_view": null, "wanted_from_strategy": "chunked_for_recurrence" }, "save_dir": "./checkpoints", "wandb_project": "Albertone", "wandb_run_name": "Albertone" }