Instructions to use yitongl/sparse_quant_exp with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use yitongl/sparse_quant_exp with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("yitongl/sparse_quant_exp", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| { | |
| "run_name": "sfp4_v4_sparse09_hpo_on_ours_p_init2050_1n_interactive", | |
| "checkpoint": "checkpoint-700", | |
| "training_method": "legacy_sft_wan_training_pipeline", | |
| "model_path": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers", | |
| "init_weights_from_safetensors": "checkpoints/init/sfp4_v4_sparse06_hpo_on_ours_p_1n_interactive_v2_ckpt2050/transformer/diffusion_pytorch_model.safetensors", | |
| "environment": { | |
| "FASTVIDEO_ATTENTION_BACKEND": "SPARSE_FP4_OURS_P_ATTN", | |
| "FASTVIDEO_SPARSE_FP4_USE_HIGH_PREC_O": "1", | |
| "FASTVIDEO_VALIDATION_ONE_PROMPT_PER_RANK": "1", | |
| "WANDB_MODE": "online", | |
| "WANDB_RESUME": "allow" | |
| }, | |
| "vsa_schedule": { | |
| "VSA_SPARSITY": 0.9, | |
| "VSA_INIT_SPARSITY": 0.9, | |
| "VSA_WARMUP_STEPS": 0, | |
| "VSA_DECAY_RATE": 0.03, | |
| "VSA_DECAY_INTERVAL_STEPS": 50, | |
| "effective_sparsity_from_step_0": 0.9 | |
| }, | |
| "attention_semantics": { | |
| "selected_backend": "SPARSE_FP4_OURS_P_ATTN", | |
| "self_attention": { | |
| "backend_path": "fastvideo/attention/backends/sparse_fp4_ours_p_attn.py", | |
| "kernel_path": "fastvideo-kernel/python/fastvideo_kernel/triton_kernels/block_sparse_attn_triton_ours_p.py", | |
| "tile_size_video": [4, 4, 4], | |
| "tile_tokens": 64, | |
| "qkv_quantization": "FP4 fake quantization with STE, no q/k mean subtraction in quantization", | |
| "block_selection": "top-k blocks from q_c @ k_c tile-mean scores", | |
| "p_quantization": "group-local exp2(qk - group_max) FP4 fake quantization; compensation multiplies exp2(group_max - running_row_m)", | |
| "dropped_tile_handling": "tile-level q_mean/k_mean score and mean_v compensation" | |
| }, | |
| "cross_attention": { | |
| "backend": "dense_sdpa", | |
| "reason": "sparse_fp4_ours_p_attn.py treats query_length != key_length as cross attention and returns _dense_sdpa_blhd", | |
| "quantized": false, | |
| "sparse": false | |
| }, | |
| "force_dense": { | |
| "backend": "dense_sdpa", | |
| "used_for": "teacher or explicitly forced dense paths, not the normal SFT student self-attention path" | |
| } | |
| }, | |
| "validation_and_checkpointing": { | |
| "save_steps": 50, | |
| "eval_steps": 50, | |
| "validation_sampling_steps": 50, | |
| "validation_guidance_scale": 5.0, | |
| "checkpoints_total_limit": 5, | |
| "flow_shift": 1.0 | |
| }, | |
| "training_shape": { | |
| "num_latent_t": 20, | |
| "num_frames": 77, | |
| "height": 448, | |
| "width": 832, | |
| "batch_size_per_gpu": 1, | |
| "sp_size": 1, | |
| "tp_size": 1 | |
| } | |
| } | |