CompressedGemma commited on
Commit
dc3b370
·
verified ·
1 Parent(s): 5c1c396

Tensor tweak

Browse files
Files changed (1) hide show
  1. hexstate_requantize.py +9 -1
hexstate_requantize.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- HexState GGUF Re-Quantizer — GGUF-to-GGUF Q2_K quantization.
4
 
5
  Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
6
  and re-quantizes eligible weight tensors to Q2_K using numpy.
@@ -636,6 +636,14 @@ def should_quantize(name, n_dims, dims, tied_embeddings=False):
636
  return False
637
  if 'layer_output_scale' in name:
638
  return False
 
 
 
 
 
 
 
 
639
  # DeltaNet state-space parameters — keep at full precision
640
  if 'ssm_a' in name or 'A_log' in name:
641
  return False
 
1
  #!/usr/bin/env python3
2
  """
3
+ HExState GGUF Re-Quantizer — GGUF-to-GGUF Q2_K quantization.
4
 
5
  Reads a source GGUF (F16/BF16/F32), copies all metadata verbatim,
6
  and re-quantizes eligible weight tensors to Q2_K using numpy.
 
636
  return False
637
  if 'layer_output_scale' in name:
638
  return False
639
+ # Embedding table — this is a lookup, not a matmul; Q2_K destroys
640
+ # token distinctions. Keep at source precision (F16/BF16).
641
+ if 'token_embd' in name:
642
+ return False
643
+ # LM head output projection — logit precision is critical for generation.
644
+ # (When tied with embeddings, this is the same tensor and also skipped above.)
645
+ if name == 'output.weight':
646
+ return False
647
  # DeltaNet state-space parameters — keep at full precision
648
  if 'ssm_a' in name or 'A_log' in name:
649
  return False