CompressedGemma commited on
Commit
44e6b86
·
verified ·
1 Parent(s): 099fd3c

Qwen attention tensors

Browse files
Files changed (1) hide show
  1. hexstate_requantize.py +2 -2
hexstate_requantize.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- HexState GGUF Re-Quantizer — GGUF-to-GGUF HPC quantization.
4
 
5
  Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
6
  and re-quantizes eligible weight tensors using the HExState HPC engine
@@ -603,7 +603,7 @@ def is_attention_tensor(name):
603
  These are the most sensitive to quantization and get promoted to Q4_0."""
604
  attn_patterns = [
605
  'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
606
- 'attn_qkv.weight',
607
  'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
608
  'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
609
  # Qwen 3.6 DeltaNet SSM projections — treat as attention-class
 
1
  #!/usr/bin/env python3
2
  """
3
+ HExState GGUF Re-Quantizer — GGUF-to-GGUF HPC quantization.
4
 
5
  Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
6
  and re-quantizes eligible weight tensors using the HExState HPC engine
 
603
  These are the most sensitive to quantization and get promoted to Q4_0."""
604
  attn_patterns = [
605
  'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
606
+ 'attn_qkv.weight', 'attn_gate.weight',
607
  'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
608
  'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
609
  # Qwen 3.6 DeltaNet SSM projections — treat as attention-class