{ "source_model": "microsoft/Lens", "target_model": "WaveCut/Lens-SDNQ-uint4-static", "method": "SDNQ uint4 static", "corrected_recipe": true, "weights_dtype": "uint4", "quantized_matmul_dtype": "int8", "group_size": 0, "use_quantized_matmul": true, "dequantize_fp32": false, "modules_to_not_convert_user": [ ".final_layer", "pos_embed", ".norm_out", ".y_embedder", ".context_embedder", ".condition_embedder", ".x_embedder", ".vid_out", ".emb_out", ".img_in", "patch_embed", ".time_embed", ".t_embedder", "multi_modal_projector", "patch_emb", "norm", ".img_out", "patch_embedding", "lm_head", ".proj_out", ".vid_in", ".txt_in", "wte", "time_text_embed", ".txt_out", ".emb_in", "*.img_mod.*", "*.txt_mod.*" ], "root_cause_from_turbo_ablation": "Do not quantize transformer modulation linears (*.img_mod.* and *.txt_mod.*); all-linear UINT4 caused periodic grid artifacts and text degradation on Lens-Turbo.", "transformer_load_time_s": 3.677, "transformer_load_peak_allocated_gb": 8.359, "transformer_load_peak_reserved_gb": 8.424, "quantization_time_s": 0.313, "quantization_peak_allocated_gb": 8.425, "quantization_peak_reserved_gb": 8.485, "base_transformer_tensor_storage_gb": 16.417, "quant_transformer_tensor_storage_gb": 4.301, "transformer_storage_reduction_percent": 73.8, "base_transformer_repo_files_gb": 16.417, "quant_transformer_repo_files_gb": 4.302, "base_transformer_dtypes": { "FLOAT32": 16416900608 }, "quant_transformer_dtypes": { "BFLOAT16": 2942501632, "UINT8": 1358954496 }, "base_transformer_tensors": 1264, "quant_transformer_tensors": 2224 }