| { | |
| "toolchain_version": "0.2.0", | |
| "vendor": "Meta", | |
| "family": "Llama-3.2", | |
| "name": "Llama-3.2-1B-Instruct", | |
| "size": "1B", | |
| "quantization": null, | |
| "repo": "meta-llama/Llama-3.2-1B-Instruct", | |
| "use_cases": [], | |
| "model_config": { | |
| "embedding_config": { | |
| "type": "TiedEmbeddingConfig", | |
| "input_scale": null, | |
| "logits_soft_cap": null, | |
| "precision": "float16" | |
| }, | |
| "global_rope_config": { | |
| "type": "LlamaRoPEConfig", | |
| "precision": "float16", | |
| "base": 500000.0, | |
| "max_sequence_length": 131072, | |
| "scaling_factor": 32.0, | |
| "original_context_length": 8192, | |
| "low_frequency_factor": 1.0, | |
| "high_frequency_factor": 4.0 | |
| }, | |
| "local_rope_config": null, | |
| "layer_config": { | |
| "pre_attention_norm_config": { | |
| "scale_precision": "float16", | |
| "accumulation_precision": "float32", | |
| "epsilon": 1e-05, | |
| "scale_offset": null, | |
| "upcast_mode": "only_normalization" | |
| }, | |
| "attention_config": { | |
| "qkv_projection_config": { | |
| "type": "FullPrecisionLinearConfig", | |
| "precision": "float16" | |
| }, | |
| "out_projection_config": { | |
| "type": "FullPrecisionLinearConfig", | |
| "precision": "float16" | |
| }, | |
| "query_norm_config": null, | |
| "key_norm_config": null, | |
| "logit_soft_cap": null, | |
| "has_qkv_biases": false, | |
| "has_out_biases": false | |
| }, | |
| "post_attention_norm_config": null, | |
| "pre_mlp_norm_config": { | |
| "scale_precision": "float16", | |
| "accumulation_precision": "float32", | |
| "epsilon": 1e-05, | |
| "scale_offset": null, | |
| "upcast_mode": "only_normalization" | |
| }, | |
| "mlp_config": { | |
| "linear_config": { | |
| "type": "FullPrecisionLinearConfig", | |
| "precision": "float16" | |
| }, | |
| "activation": "silu" | |
| }, | |
| "post_mlp_norm_config": null | |
| }, | |
| "output_norm_config": { | |
| "scale_precision": "float16", | |
| "accumulation_precision": "float32", | |
| "epsilon": 1e-05, | |
| "scale_offset": null, | |
| "upcast_mode": "only_normalization" | |
| }, | |
| "vocab_size": 128256, | |
| "model_dim": 2048, | |
| "hidden_dim": 8192, | |
| "num_heads": 32, | |
| "num_groups": 8, | |
| "head_dim": 64, | |
| "attention_scale": null, | |
| "num_layers": 16, | |
| "sliding_window_sizes": null, | |
| "context_length": 131072 | |
| }, | |
| "tokenizer_file_names": [ | |
| "tokenizer.json", | |
| "tokenizer_config.json", | |
| "generation_config.json" | |
| ] | |
| } |