| { |
| "_remove_final_layer_norm": false, |
| "activation_dropout": 0.0, |
| "activation_function": "relu", |
| "architectures": [ |
| "OPTForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "bos_token_id": 2, |
| "do_layer_norm_before": true, |
| "dropout": 0.1, |
| "enable_bias": true, |
| "eos_token_id": 2, |
| "ffn_dim": 3072, |
| "hidden_size": 768, |
| "init_std": 0.02, |
| "layer_norm_elementwise_affine": true, |
| "layerdrop": 0.0, |
| "max_position_embeddings": 2048, |
| "model_type": "opt", |
| "num_attention_heads": 12, |
| "num_hidden_layers": 12, |
| "pad_token_id": 1, |
| "prefix": "</s>", |
| "quantization_config": { |
| "include_input_output_embeddings": false, |
| "modules_to_not_convert": null, |
| "quant_method": "torchao", |
| "quant_type": { |
| "default": { |
| "_data": { |
| "module_fqn_to_config": { |
| "model.decoder.layers.0.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.0.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.0.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.0.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.0.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.0.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.0.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.1.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.10.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.11.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.2.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.3.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.4.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.5.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.6.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.7.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.8.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.fc1": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.fc2": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.self_attn.k_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.self_attn.out_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.self_attn.q_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.self_attn.qkv_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| }, |
| "model.decoder.layers.9.self_attn.v_proj": { |
| "_data": { |
| "activation_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| }, |
| "granularity": [ |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| }, |
| { |
| "_data": {}, |
| "_type": "PerRow", |
| "_version": 1 |
| } |
| ], |
| "mm_config": { |
| "_data": { |
| "emulate": false, |
| "pad_inner_dim": false, |
| "use_fast_accum": true |
| }, |
| "_type": "Float8MMConfig", |
| "_version": 1 |
| }, |
| "set_inductor_config": true, |
| "weight_dtype": { |
| "_data": "float8_e4m3fnuz", |
| "_type": "torch.dtype" |
| } |
| }, |
| "_type": "Float8DynamicActivationFloat8WeightConfig", |
| "_version": 1 |
| } |
| } |
| }, |
| "_type": "ModuleFqnToConfig", |
| "_version": 1 |
| } |
| }, |
| "quant_type_kwargs": {}, |
| "untie_embedding_weights": false |
| }, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.52.4", |
| "use_cache": true, |
| "vocab_size": 50272, |
| "word_embed_proj_dim": 768 |
| } |
|
|