Qwen attention tensors
Browse files- hexstate_requantize.py +2 -2
hexstate_requantize.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
|
| 4 |
|
| 5 |
Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
|
| 6 |
and re-quantizes eligible weight tensors using the HExState HPC engine
|
|
@@ -603,7 +603,7 @@ def is_attention_tensor(name):
|
|
| 603 |
These are the most sensitive to quantization and get promoted to Q4_0."""
|
| 604 |
attn_patterns = [
|
| 605 |
'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
|
| 606 |
-
'attn_qkv.weight',
|
| 607 |
'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
|
| 608 |
'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
|
| 609 |
# Qwen 3.6 DeltaNet SSM projections — treat as attention-class
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
HExState GGUF Re-Quantizer — GGUF-to-GGUF HPC quantization.
|
| 4 |
|
| 5 |
Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
|
| 6 |
and re-quantizes eligible weight tensors using the HExState HPC engine
|
|
|
|
| 603 |
These are the most sensitive to quantization and get promoted to Q4_0."""
|
| 604 |
attn_patterns = [
|
| 605 |
'attn_q.weight', 'attn_k.weight', 'attn_v.weight', 'attn_output.weight',
|
| 606 |
+
'attn_qkv.weight', 'attn_gate.weight',
|
| 607 |
'self_attn.q_proj.weight', 'self_attn.k_proj.weight',
|
| 608 |
'self_attn.v_proj.weight', 'self_attn.o_proj.weight',
|
| 609 |
# Qwen 3.6 DeltaNet SSM projections — treat as attention-class
|