{ "dim": 5120, "n_layers": 40, "head_dim": 128, "hidden_dim": 32768, "n_heads": 32, "n_kv_heads": 8, "rope_theta": 1000000000.0, "norm_eps": 1e-05, "vocab_size": 131072, "quantization": { "config_groups": { "group_0": { "input_activations": { "dynamic": true, "num_bits": 8, "observer": null, "strategy": "token", "symmetric": true, "type": "float" }, "targets": [ "Linear" ], "weights": { "dynamic": false, "num_bits": 8, "observer": "minmax", "strategy": "tensor", "symmetric": true, "type": "float" } } }, "format": "float-quantized", "ignore": [ "lm_head", "output" ], "quant_method": "compressed-tensors", "quantization_status": "compressed" } }