File size: 1,025 Bytes
aeacbcb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
  "model": "DeepSeek-V4-Flash-Base",
  "format": "int4_standalone_v1",
  "format_version": "1.1",
  "format_notes": "Standalone INT4 checkpoint (TP-sharded). Each safetensors file contains the FULL state for its rank: non-quantized weights kept as native FP8/UE8M0/BF16, quantized linears replaced by their packed INT4 form. Per-linear quant kind in `per_linear_quant.json`: 'sym' = packed nibbles + fp16 scale; 'asym' = packed nibbles + fp16 scale + uint8 zero_point; 'bf16' = bf16 weight (with fakequant noise baked in). Loader: see int4_overlay_loader.py:apply_overlay (dispatches on kind from per_linear_quant.json).",
  "recipe_name": "v9-no-attn",
  "fp8_source_release": "deepseek-ai/DeepSeek-V4-Flash-Base",
  "n_quantized_per_rank": 8509,
  "n_sym_per_rank": 0,
  "n_asym_per_rank": 8509,
  "n_skipped_per_rank": 0,
  "bytes_native_per_rank": 70738414720,
  "bytes_packed_per_rank": 42035017728,
  "compression_ratio_quantized_only": 1.6828448884626102,
  "tp_world_size": 4,
  "saved_at": "2026-04-26T18:36:22Z"
}