{ "bottleneck": { "args": { "bottleneck_dim": 16, "norm": "none", "regularizer": { "args": { "codebook_loss_weight": 1.0, "codebook_size": 8192, "commitment_loss_weight": 0.25, "entropy_loss_temperature": 0.01, "entropy_loss_weight": 0.0, "eval_deterministic": true, "l2_normalized": true, "stochastic": true, "stochastic_temperature": 0.03 }, "name": "vq" } }, "name": "bottleneck" }, "bottleneck_token_num": 2048, "decode_mode": "with_drop", "decoder_attn_type": "full_causal_type2", "decoder_depth": 12, "decoder_hidden_size": 768, "decoder_name": "none", "decoder_num_heads": 12, "decoder_patch_size": 8, "decoder_temporal_patch_size": 4, "encoder_attn_type": "full_causal_type1", "encoder_depth": 12, "encoder_hidden_size": 768, "encoder_name": "none", "encoder_num_heads": 12, "encoder_query_gaussian_init": true, "frame_num": 16, "in_channels": 3, "input_size": 128, "latent_pe_scale_factor": 10000, "learned_decoder_latent_pe": false, "learned_decoder_patch_query_embed": false, "learned_encoder_latent_query_embed": true, "learned_encoder_patch_pe": false, "mask_generator": { "eval_mask_type": "full_ones", "mask_type": "left_masking_by_group_normal", "max_toks": 512, "mean_toks": 256, "min_first_toks": 1, "min_toks": 32, "std_toks": 128, "tot_groups": 4, "total_toks": 512 }, "mode": "train_adap", "patch_size": 8, "prior_model": { "args": { "l2_normalized": true }, "avg_loss_over_rounds": true, "latent_ce_temperature": 1.0, "mix_ss_max_ratio": 0.5, "mix_ss_peak_steps_ratio": 0.3, "n_rounds": 2, "name": "gptc-S", "no_dropout": false, "no_grad_before_last_round": false, "use_mix_ss": true }, "query_init_std": 0.02, "temporal_patch_size": 4, "transformer_name": "transformer_encoder_parallel", "transformers_version": "4.50.0", "use_decoder_latent_token_type_embed": false, "use_decoder_patch_query_token_type_embed": true, "use_encoder_latent_query_token_type_embed": false, "use_encoder_patch_token_type_embed": false }