Instructions to use yitongl/sparse_quant_exp with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use yitongl/sparse_quant_exp with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("yitongl/sparse_quant_exp", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
File size: 2,400 Bytes
1d0c0cc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | {
"run_name": "sfp4_v4_sparse09_hpo_on_ours_p_init2050_1n_interactive",
"checkpoint": "checkpoint-700",
"training_method": "legacy_sft_wan_training_pipeline",
"model_path": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
"init_weights_from_safetensors": "checkpoints/init/sfp4_v4_sparse06_hpo_on_ours_p_1n_interactive_v2_ckpt2050/transformer/diffusion_pytorch_model.safetensors",
"environment": {
"FASTVIDEO_ATTENTION_BACKEND": "SPARSE_FP4_OURS_P_ATTN",
"FASTVIDEO_SPARSE_FP4_USE_HIGH_PREC_O": "1",
"FASTVIDEO_VALIDATION_ONE_PROMPT_PER_RANK": "1",
"WANDB_MODE": "online",
"WANDB_RESUME": "allow"
},
"vsa_schedule": {
"VSA_SPARSITY": 0.9,
"VSA_INIT_SPARSITY": 0.9,
"VSA_WARMUP_STEPS": 0,
"VSA_DECAY_RATE": 0.03,
"VSA_DECAY_INTERVAL_STEPS": 50,
"effective_sparsity_from_step_0": 0.9
},
"attention_semantics": {
"selected_backend": "SPARSE_FP4_OURS_P_ATTN",
"self_attention": {
"backend_path": "fastvideo/attention/backends/sparse_fp4_ours_p_attn.py",
"kernel_path": "fastvideo-kernel/python/fastvideo_kernel/triton_kernels/block_sparse_attn_triton_ours_p.py",
"tile_size_video": [4, 4, 4],
"tile_tokens": 64,
"qkv_quantization": "FP4 fake quantization with STE, no q/k mean subtraction in quantization",
"block_selection": "top-k blocks from q_c @ k_c tile-mean scores",
"p_quantization": "group-local exp2(qk - group_max) FP4 fake quantization; compensation multiplies exp2(group_max - running_row_m)",
"dropped_tile_handling": "tile-level q_mean/k_mean score and mean_v compensation"
},
"cross_attention": {
"backend": "dense_sdpa",
"reason": "sparse_fp4_ours_p_attn.py treats query_length != key_length as cross attention and returns _dense_sdpa_blhd",
"quantized": false,
"sparse": false
},
"force_dense": {
"backend": "dense_sdpa",
"used_for": "teacher or explicitly forced dense paths, not the normal SFT student self-attention path"
}
},
"validation_and_checkpointing": {
"save_steps": 50,
"eval_steps": 50,
"validation_sampling_steps": 50,
"validation_guidance_scale": 5.0,
"checkpoints_total_limit": 5,
"flow_shift": 1.0
},
"training_shape": {
"num_latent_t": 20,
"num_frames": 77,
"height": 448,
"width": 832,
"batch_size_per_gpu": 1,
"sp_size": 1,
"tp_size": 1
}
}
|