{ "toolchain_version": "0.2.0", "vendor": "Meta", "family": "Llama-3.2", "name": "Llama-3.2-1B-Instruct", "size": "1B", "quantization": null, "repo": "meta-llama/Llama-3.2-1B-Instruct", "use_cases": [], "model_config": { "embedding_config": { "type": "TiedEmbeddingConfig", "input_scale": null, "logits_soft_cap": null, "precision": "float16" }, "global_rope_config": { "type": "LlamaRoPEConfig", "precision": "float16", "base": 500000.0, "max_sequence_length": 131072, "scaling_factor": 32.0, "original_context_length": 8192, "low_frequency_factor": 1.0, "high_frequency_factor": 4.0 }, "local_rope_config": null, "layer_config": { "pre_attention_norm_config": { "scale_precision": "float16", "accumulation_precision": "float32", "epsilon": 1e-05, "scale_offset": null, "upcast_mode": "only_normalization" }, "attention_config": { "qkv_projection_config": { "type": "FullPrecisionLinearConfig", "precision": "float16" }, "out_projection_config": { "type": "FullPrecisionLinearConfig", "precision": "float16" }, "query_norm_config": null, "key_norm_config": null, "logit_soft_cap": null, "has_qkv_biases": false, "has_out_biases": false }, "post_attention_norm_config": null, "pre_mlp_norm_config": { "scale_precision": "float16", "accumulation_precision": "float32", "epsilon": 1e-05, "scale_offset": null, "upcast_mode": "only_normalization" }, "mlp_config": { "linear_config": { "type": "FullPrecisionLinearConfig", "precision": "float16" }, "activation": "silu" }, "post_mlp_norm_config": null }, "output_norm_config": { "scale_precision": "float16", "accumulation_precision": "float32", "epsilon": 1e-05, "scale_offset": null, "upcast_mode": "only_normalization" }, "vocab_size": 128256, "model_dim": 2048, "hidden_dim": 8192, "num_heads": 32, "num_groups": 8, "head_dim": 64, "attention_scale": null, "num_layers": 16, "sliding_window_sizes": null, "context_length": 131072 }, "tokenizer_file_names": [ "tokenizer.json", "tokenizer_config.json", "generation_config.json" ] }