hmr-dataset / configs /network /diffusion.yaml
zirobtc's picture
Upload folder using huggingface_hub
fbb20ff verified
_target_: genmo.network.genmo_diffusion.GENMODiffusion
args: ${pipeline.args}
latent_dim: ${.model_cfg.denoiser.latent_dim}
cond_merge_strategy: "add"
music_mask_prob: ${.model_cfg.denoiser.music_mask_prob}
speech_mask_prob: ${.model_cfg.denoiser.speech_mask_prob}
encoded_music_dim: ${pipeline.args.encoded_music_dim}
model_cfg:
diffusion: ${model_cfg.diffusion}
denoiser:
_target_: genmo.network.genmo_denoiser.NetworkEncoderRoPE
output_dim: 151
xt_dim: ${.output_dim}
njoints: ${.xt_dim}
text_mask_prob: 0.1
music_mask_prob: 0.1
speech_mask_prob: 0.1
use_text_pos_enc: true
text_encoder_cfg:
mode: all
cross_attn_type: mha
latent_dim: 1024
num_layers: 16
num_heads: 8
mlp_ratio: 4