| { |
| "bottleneck": { |
| "args": { |
| "bottleneck_dim": 16, |
| "norm": "none", |
| "regularizer": { |
| "args": { |
| "codebook_loss_weight": 1.0, |
| "codebook_size": 8192, |
| "commitment_loss_weight": 0.25, |
| "entropy_loss_temperature": 0.01, |
| "entropy_loss_weight": 0.0, |
| "eval_deterministic": true, |
| "l2_normalized": true, |
| "stochastic": true, |
| "stochastic_temperature": 0.03, |
| "train_deterministic": true |
| }, |
| "name": "vq" |
| } |
| }, |
| "name": "bottleneck" |
| }, |
| "bottleneck_token_num": 2048, |
| "decode_mode": "with_drop", |
| "decoder_attn_type": "full_causal_type2", |
| "decoder_depth": 12, |
| "decoder_hidden_size": 768, |
| "decoder_name": "none", |
| "decoder_num_heads": 12, |
| "decoder_patch_size": 8, |
| "decoder_temporal_patch_size": 4, |
| "encoder_attn_type": "full_causal_type1", |
| "encoder_depth": 12, |
| "encoder_hidden_size": 768, |
| "encoder_name": "none", |
| "encoder_num_heads": 12, |
| "encoder_query_gaussian_init": true, |
| "frame_num": 16, |
| "in_channels": 3, |
| "input_size": 128, |
| "latent_pe_scale_factor": 10000, |
| "learned_decoder_latent_pe": false, |
| "learned_decoder_patch_query_embed": false, |
| "learned_encoder_latent_query_embed": true, |
| "learned_encoder_patch_pe": false, |
| "mask_generator": { |
| "mask_type": "left_masking_by_group_normal", |
| "max_toks": 512, |
| "mean_toks": 256, |
| "min_first_toks": 1, |
| "min_toks": 32, |
| "std_toks": 128, |
| "tot_groups": 4, |
| "total_toks": 512 |
| }, |
| "mode": "train_online_scorer", |
| "patch_size": 8, |
| "prior_model": { |
| "args": { |
| "l2_normalized": true |
| }, |
| "avg_loss_over_rounds": true, |
| "latent_ce_temperature": 1.0, |
| "mix_ss_max_ratio": 0.5, |
| "mix_ss_peak_steps_ratio": 0.3, |
| "n_rounds": 2, |
| "name": "gptc-S", |
| "no_dropout": false, |
| "no_grad_before_last_round": false, |
| "use_mix_ss": true |
| }, |
| "query_init_std": 0.02, |
| "scorer_attn_type": "full_causal_type1", |
| "scorer_depth": 12, |
| "scorer_hidden_size": 768, |
| "scorer_name": "transformer_scorer", |
| "scorer_num_heads": 12, |
| "scorer_step": 64, |
| "temporal_patch_size": 4, |
| "token_select_mode": "none", |
| "token_select_num": 10000, |
| "transformer_name": "transformer_encoder_parallel", |
| "transformers_version": "4.50.0", |
| "use_decoder_latent_token_type_embed": false, |
| "use_decoder_patch_query_token_type_embed": true, |
| "use_encoder_latent_query_token_type_embed": false, |
| "use_encoder_patch_token_type_embed": false |
| } |
|
|