|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from hydra.core.config_store import ConfigStore |
|
|
|
|
|
from .ar_diffusion_decoder_network import DiffusionDecoderGeneralDIT |
|
|
from .lazy_config_init import LazyCall as L |
|
|
from .lazy_config_init import LazyDict |
|
|
|
|
|
num_frames = 57 |
|
|
Cosmos_DiffusionDecoder_7B_INFERENCE_ONLY: LazyDict = LazyDict( |
|
|
dict( |
|
|
defaults=[ |
|
|
{"override /net": "faditv2_7b"}, |
|
|
{"override /tokenizer": "cosmos_video_tokenizer_res720_comp8x8x8_t121_ver092624"}, |
|
|
{"override /conditioner": "video_latent_diffusion_decoder_cond"}, |
|
|
{"override /tokenizer_corruptor": "cosmos_video_discrete_tokenizer_res720_comp8x16x16_t49_ver110224"}, |
|
|
"_self_", |
|
|
], |
|
|
job=dict( |
|
|
group="diffusion_deocder_FT_7Bv1_001", |
|
|
name="DD_FT_7Bv1_003_002_tokenizer888_spatch2_discrete_cond_on_token", |
|
|
), |
|
|
model=dict( |
|
|
diffusion_decoder_cond_sigma_low=0.0, |
|
|
diffusion_decoder_cond_sigma_high=0.0, |
|
|
diffusion_decoder_corrupt_prob=0.0, |
|
|
condition_on_tokenizer_corruptor_token=True, |
|
|
latent_shape=[ |
|
|
16, |
|
|
num_frames, |
|
|
88, |
|
|
160, |
|
|
], |
|
|
tokenizer_corruptor=dict( |
|
|
pixel_chunk_duration=num_frames, |
|
|
latent_chunk_duration=1 + (num_frames - 1) // 8, |
|
|
), |
|
|
net=L(DiffusionDecoderGeneralDIT)( |
|
|
diffusion_decoder_condition_on_sigma=False, |
|
|
max_img_h=240, |
|
|
max_img_w=240, |
|
|
rope_h_extrapolation_ratio=1.5, |
|
|
rope_w_extrapolation_ratio=1.5, |
|
|
rope_t_extrapolation_ratio=1, |
|
|
block_x_format="THWBD", |
|
|
is_diffusion_decoder=True, |
|
|
patch_spatial=2, |
|
|
diffusion_decoder_condition_on_token=True, |
|
|
diffusion_decoder_token_condition_voc_size=64000, |
|
|
diffusion_decoder_token_condition_dim=32, |
|
|
), |
|
|
tokenizer=dict( |
|
|
video_vae=dict( |
|
|
pixel_chunk_duration=num_frames, |
|
|
) |
|
|
), |
|
|
conditioner=dict( |
|
|
latent_condition=dict( |
|
|
dropout_rate=0.2, |
|
|
) |
|
|
), |
|
|
), |
|
|
) |
|
|
) |
|
|
|
|
|
cs = ConfigStore.instance() |
|
|
cs.store( |
|
|
group="experiment", |
|
|
package="_global_", |
|
|
name=Cosmos_DiffusionDecoder_7B_INFERENCE_ONLY["job"]["name"], |
|
|
node=Cosmos_DiffusionDecoder_7B_INFERENCE_ONLY, |
|
|
) |
|
|
|