| { | |
| "activation_function": "gelu_new", | |
| "alignment_head": [ | |
| 2, | |
| null, | |
| null | |
| ], | |
| "alignment_layer": [ | |
| 68, | |
| null, | |
| null | |
| ], | |
| "architectures": [ | |
| "JukeboxModel" | |
| ], | |
| "attn_dropout": 0.0, | |
| "attn_init_scale": 1.0, | |
| "attn_order": [ | |
| 12, | |
| 2, | |
| 2 | |
| ], | |
| "blocks": 16, | |
| "bos_token_id": 50256, | |
| "c_res": 1, | |
| "cond_c_res": [ | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| "cond_depth": [ | |
| 3, | |
| 16, | |
| 16 | |
| ], | |
| "cond_dilation_cycle": [ | |
| null, | |
| 8, | |
| 8 | |
| ], | |
| "cond_dilation_growth_rate": [ | |
| 1, | |
| 3, | |
| 3 | |
| ], | |
| "cond_m_conv": 1, | |
| "cond_res_scale": false, | |
| "cond_width": [ | |
| 128, | |
| 128, | |
| 64 | |
| ], | |
| "cond_zero_out": false, | |
| "copy_input": false, | |
| "depth": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "downs_t": [ | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| "emb_dropout": 0.1, | |
| "eos_token_id": 50256, | |
| "fp16_params": true, | |
| "hop_length": 256, | |
| "init_scale": [ | |
| 0.7, | |
| 1, | |
| 1 | |
| ], | |
| "initializer_range": 0.02, | |
| "l_bins": 128, | |
| "labels": true, | |
| "layer_norm_epsilon": 1e-05, | |
| "m_attn": 0.25, | |
| "max_bow_genre_size": 1, | |
| "max_duration": 600.0, | |
| "merged_decoder": [ | |
| true, | |
| false, | |
| false | |
| ], | |
| "min_duration": 1, | |
| "mlp_init_scale": 0.02, | |
| "model_type": "jukebox", | |
| "multispec_loss_hop_length": [ | |
| 240, | |
| 120, | |
| 50 | |
| ], | |
| "multispec_loss_n_fft": [ | |
| 2048, | |
| 1024, | |
| 512 | |
| ], | |
| "multispec_loss_window_size": [ | |
| 1200, | |
| 600, | |
| 240 | |
| ], | |
| "multispectral": 1.0, | |
| "n_ctx": [256,256,256], | |
| "n_embd": 768, | |
| "n_head": 12, | |
| "n_heads": [ | |
| 2, | |
| 1, | |
| 1 | |
| ], | |
| "n_inner": null, | |
| "n_layer": 12, | |
| "n_positions": 1024, | |
| "n_tokens": [ | |
| 512, | |
| 0, | |
| 0 | |
| ], | |
| "n_vocab": 79, | |
| "name": "AudioSamples", | |
| "nb_priors": 3, | |
| "pos_init": false, | |
| "prime_attn_dropout": 0.0, | |
| "prime_attn_order": [ | |
| 2, | |
| 0, | |
| 0 | |
| ], | |
| "prime_blocks": 32, | |
| "prime_c_res": 1, | |
| "prime_cond_c_res": [ | |
| 0, | |
| 1, | |
| 1 | |
| ], | |
| "prime_depth": [ | |
| 18, | |
| 3, | |
| 3 | |
| ], | |
| "prime_emb_dropout": 0.0, | |
| "prime_heads": 4, | |
| "prime_init_scale": [ | |
| 0.1, | |
| 0.4, | |
| 0.4 | |
| ], | |
| "prime_loss_fraction": [ | |
| 0.4, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "prime_m_attn": 0.25, | |
| "prime_m_mlp": 1.0, | |
| "prime_pos_init": false, | |
| "prime_res_scale": false, | |
| "prime_resid_dropout": 0.0, | |
| "prime_spread": null, | |
| "prime_width": [ | |
| 128, | |
| 128, | |
| 128 | |
| ], | |
| "prime_zero_out": false, | |
| "priors_width": [ | |
| 128, | |
| 64, | |
| 32 | |
| ], | |
| "reorder_and_upcast_attn": false, | |
| "res_scale": false, | |
| "resid_dropout": 0.0, | |
| "sample_hop_length": 30000, | |
| "sample_length": 44032, | |
| "sample_length_in_seconds": 1, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "scale_attn_weights": true, | |
| "single_enc_dec": [ | |
| true, | |
| false, | |
| false | |
| ], | |
| "spectral": 0.0, | |
| "spread": null, | |
| "sr": 44100, | |
| "strides_t": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "summary_activation": null, | |
| "summary_first_dropout": 0.1, | |
| "summary_proj_to_labels": true, | |
| "summary_type": "cls_index", | |
| "summary_use_proj": true, | |
| "t_bins": 64, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.19.0.dev0", | |
| "use_cache": true, | |
| "use_nonrelative_specloss": true, | |
| "use_tokens": [ | |
| true, | |
| false, | |
| false | |
| ], | |
| "vocab_size": 50257, | |
| "vq_vae_codebook_dimension": 128, | |
| "vq_vae_commit": 0.02, | |
| "vq_vae_conv_block_depth": 4, | |
| "vq_vae_conv_block_width": 64, | |
| "vq_vae_depth": 4, | |
| "vq_vae_dilation_cycle": null, | |
| "vq_vae_dilation_growth_rate": 3, | |
| "vq_vae_downs_t": [ | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| "vq_vae_emmbedding_width": 128, | |
| "vq_vae_levels": 3, | |
| "vq_vae_lmu": 0.99, | |
| "vq_vae_m_conv": 1, | |
| "vq_vae_multipliers": [ | |
| 2, | |
| 1, | |
| 1 | |
| ], | |
| "vq_vae_reverse_decoder_dilation": 1, | |
| "vq_vae_strides_t": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "vq_vae_width": 64, | |
| "vqvae_z_shapes": [ | |
| [ | |
| 344 | |
| ], | |
| [ | |
| 1376 | |
| ], | |
| [ | |
| 5504 | |
| ] | |
| ], | |
| "width": [ | |
| 128, | |
| 64, | |
| 32 | |
| ], | |
| "y_bins": [ | |
| [120,4111],[120,4111],[120,4111] | |
| ], | |
| "zero_out": false | |
| } | |