| { | |
| "model": "DeepSeek-V4-Flash-Base", | |
| "format": "int4_standalone_v1", | |
| "format_version": "1.1", | |
| "format_notes": "Standalone INT4 checkpoint (TP-sharded). Each safetensors file contains the FULL state for its rank: non-quantized weights kept as native FP8/UE8M0/BF16, quantized linears replaced by their packed INT4 form. Per-linear quant kind in `per_linear_quant.json`: 'sym' = packed nibbles + fp16 scale; 'asym' = packed nibbles + fp16 scale + uint8 zero_point; 'bf16' = bf16 weight (with fakequant noise baked in). Loader: see int4_overlay_loader.py:apply_overlay (dispatches on kind from per_linear_quant.json).", | |
| "recipe_name": "v9-no-attn", | |
| "fp8_source_release": "deepseek-ai/DeepSeek-V4-Flash-Base", | |
| "n_quantized_per_rank": 8509, | |
| "n_sym_per_rank": 0, | |
| "n_asym_per_rank": 8509, | |
| "n_skipped_per_rank": 0, | |
| "bytes_native_per_rank": 70738414720, | |
| "bytes_packed_per_rank": 42035017728, | |
| "compression_ratio_quantized_only": 1.6828448884626102, | |
| "tp_world_size": 4, | |
| "saved_at": "2026-04-26T18:36:22Z" | |
| } |