RedHatAI
/

Voxtral-Mini-3B-2507-FP8-dynamic

Automatic Speech Recognition

text-generation-inference

automatic-speech-translation

audio-text-to-text

video-text-to-text

compressed-tensors

Model card Files Files and versions

alexmarques commited on 28 days ago

Commit

aa0effb

·

verified ·

1 Parent(s): 1f6c083

Update convert_voxtral_hf_to_mistral.py

Files changed (1) hide show

convert_voxtral_hf_to_mistral.py +2 -2

convert_voxtral_hf_to_mistral.py CHANGED Viewed

@@ -101,11 +101,11 @@ def convert_state_dict(hf_state_dict, config):
         if "language_model" in hf_key:
             if hf_key.endswith("q_proj.weight"):
                 tensor = permute_for_mistral_rope(tensor, num_attention_heads, query_dim, hidden_size)
-            elif hf_key.endswith("q_proj.weight_scale") and tensor.size(0) == num_attention_heads:
                 tensor = permute_for_mistral_rope(tensor, num_attention_heads, query_dim, 1)
             elif hf_key.endswith("k_proj.weight"):
                 tensor = permute_for_mistral_rope(tensor, num_key_value_heads, key_value_dim, hidden_size)
-            elif hf_key.endswith("k_proj.weight_scale") and tensor.size(0) == num_key_value_heads:
                 tensor = permute_for_mistral_rope(tensor, num_key_value_heads, key_value_dim, 1)
         mistral_dict[mistral_key] = tensor

         if "language_model" in hf_key:
             if hf_key.endswith("q_proj.weight"):
                 tensor = permute_for_mistral_rope(tensor, num_attention_heads, query_dim, hidden_size)
+            elif hf_key.endswith("q_proj.weight_scale") and tensor.size(0) > 1:
                 tensor = permute_for_mistral_rope(tensor, num_attention_heads, query_dim, 1)
             elif hf_key.endswith("k_proj.weight"):
                 tensor = permute_for_mistral_rope(tensor, num_key_value_heads, key_value_dim, hidden_size)
+            elif hf_key.endswith("k_proj.weight_scale") and tensor.size(0) > 1:
                 tensor = permute_for_mistral_rope(tensor, num_key_value_heads, key_value_dim, 1)
         mistral_dict[mistral_key] = tensor