Spaces:
Sleeping
Sleeping
File size: 1,838 Bytes
080c0c2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | import importlib.util
from .fsdp import shard_model
from .fuser import (get_sequence_parallel_rank,
get_sequence_parallel_world_size, get_sp_group,
get_world_group, init_distributed_environment,
initialize_model_parallel, set_multi_gpus_devices,
xFuserLongContextAttention)
from .wan_xfuser import usp_attn_forward
# The pai_fuser is an internally developed acceleration package, which can be used on PAI.
if importlib.util.find_spec("pai_fuser") is not None:
from pai_fuser.core import parallel_magvit_vae
from pai_fuser.core.attention import wan_usp_sparse_attention_wrapper
from . import wan_xfuser
# The simple_wrapper is used to solve the problem about conflicts between cython and torch.compile
def simple_wrapper(func):
def inner(*args, **kwargs):
return func(*args, **kwargs)
return inner
wan_xfuser.usp_attn_forward = simple_wrapper(wan_usp_sparse_attention_wrapper()(wan_xfuser.usp_attn_forward))
usp_attn_forward = simple_wrapper(wan_xfuser.usp_attn_forward)
print("Import PAI VAE Turbo and Sparse Attention")
from pai_fuser.core.rope import ENABLE_KERNEL, usp_fast_rope_apply_qk
if ENABLE_KERNEL:
import torch
from .wan_xfuser import rope_apply
def adaptive_fast_usp_rope_apply_qk(q, k, grid_sizes, freqs):
if torch.is_grad_enabled():
q = rope_apply(q, grid_sizes, freqs)
k = rope_apply(k, grid_sizes, freqs)
return q, k
else:
return usp_fast_rope_apply_qk(q, k, grid_sizes, freqs)
wan_xfuser.rope_apply_qk = adaptive_fast_usp_rope_apply_qk
rope_apply_qk = adaptive_fast_usp_rope_apply_qk
print("Import PAI Fast rope") |