CompressedGemma
/

HPC-Quantize

Model card Files Files and versions

CompressedGemma commited on May 10

Commit

44e6b86

·

verified ·

1 Parent(s): 099fd3c

Qwen attention tensors

Files changed (1) hide show

hexstate_requantize.py +2 -2

hexstate_requantize.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-HexState GGUF Re-Quantizer — GGUF-to-GGUF HPC quantization.
 Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
 and re-quantizes eligible weight tensors using the HExState HPC engine
@@ -603,7 +603,7 @@ def is_attention_tensor(name):
     These are the most sensitive to quantization and get promoted to Q4_0."""
     attn_patterns = [
         'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
-        'attn_qkv.weight',
         'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
         'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
         # Qwen 3.6 DeltaNet SSM projections — treat as attention-class

 #!/usr/bin/env python3
 """
+HExState GGUF Re-Quantizer — GGUF-to-GGUF HPC quantization.
 Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
 and re-quantizes eligible weight tensors using the HExState HPC engine
     These are the most sensitive to quantization and get promoted to Q4_0."""
     attn_patterns = [
         'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
+        'attn_qkv.weight', 'attn_gate.weight',
         'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
         'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
         # Qwen 3.6 DeltaNet SSM projections — treat as attention-class