{ "architectures": [ "Transformer" ], "bias": false, "bilinear": true, "d_hidden": 4096, "d_model": 1024, "gate": null, "n_ctx": 512, "n_head": 16, "n_layer": 16, "normalization": true, "repo": null, "tokenizer": "mistral", "dataset": "fineweb", "torch_dtype": "float32", "transformers_version": "4.45.2" }