| { | |
| "bottleneck": { | |
| "args": { | |
| "bottleneck_dim": 16, | |
| "norm": "none", | |
| "regularizer": { | |
| "args": { | |
| "codebook_loss_weight": 1.0, | |
| "codebook_size": 8192, | |
| "commitment_loss_weight": 0.25, | |
| "entropy_loss_temperature": 0.01, | |
| "entropy_loss_weight": 0.0, | |
| "l2_normalized": true, | |
| "stochastic": true, | |
| "stochastic_temperature": 0.03 | |
| }, | |
| "name": "vq" | |
| } | |
| }, | |
| "name": "bottleneck" | |
| }, | |
| "bottleneck_token_num": 1024, | |
| "decoder_depth": 12, | |
| "decoder_hidden_size": 768, | |
| "decoder_name": "none", | |
| "decoder_num_heads": 12, | |
| "decoder_patch_size": 8, | |
| "decoder_temporal_patch_size": 4, | |
| "encoder_depth": 12, | |
| "encoder_hidden_size": 768, | |
| "encoder_name": "none", | |
| "encoder_num_heads": 12, | |
| "encoder_query_gaussian_init": true, | |
| "frame_num": 16, | |
| "in_channels": 3, | |
| "input_size": 128, | |
| "latent_pe_scale_factor": 10000, | |
| "learned_decoder_latent_pe": false, | |
| "learned_decoder_patch_query_embed": false, | |
| "learned_encoder_latent_query_embed": true, | |
| "learned_encoder_patch_pe": false, | |
| "patch_size": 8, | |
| "prior_model": { | |
| "args": { | |
| "l2_normalized": true | |
| }, | |
| "avg_loss_over_rounds": true, | |
| "latent_ce_temperature": 1.0, | |
| "mix_ss_max_ratio": 0.5, | |
| "mix_ss_peak_steps_ratio": 0.3, | |
| "n_rounds": 2, | |
| "name": "gptc-S", | |
| "no_dropout": false, | |
| "no_grad_before_last_round": false, | |
| "use_mix_ss": true | |
| }, | |
| "query_init_std": 0.02, | |
| "temporal_patch_size": 4, | |
| "transformer_name": "transformer_encoder_parallel", | |
| "use_decoder_latent_token_type_embed": false, | |
| "use_decoder_patch_query_token_type_embed": true, | |
| "use_encoder_latent_query_token_type_embed": false, | |
| "use_encoder_patch_token_type_embed": false | |
| } |