{ "num_heads": 8, "attention_dim": 768, "vocab_size": 16387, "num_blocks": 12, "ff_dim": 2304, "dropout_rate": 0.05, "possible_opt_path": "Loaded_model", "max_len": 2048, "attn_chunks": 1, "gqa_repeats": 2, "use_fash_attention": false, "emb_init_range": 0.02, "emb_scaling_factor": 1, "res_scale": 1 }