| { | |
| "_commit_hash": null, | |
| "_name_or_path": "/home/arthur_huggingface_co/transformers/jukebox-1b-lyrics-converted", | |
| "architectures": [ | |
| "JukeboxModel" | |
| ], | |
| "hop_fraction": [ | |
| 0.125, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "init_std": 0.2, | |
| "max_duration": 600.0, | |
| "max_nb_genres": 1, | |
| "metadata_conditioning": true, | |
| "min_duration": 17.84, | |
| "model_type": "jukebox", | |
| "nb_priors": 3, | |
| "prior_0": { | |
| "act_fn": "quick_gelu", | |
| "alignment_head": 2, | |
| "alignment_layer": 68, | |
| "attention_multiplier": 0.25, | |
| "attention_pattern": "enc_dec_with_lyrics", | |
| "attn_dropout": 0, | |
| "attn_res_scale": false, | |
| "blocks": 64, | |
| "conv_res_scale": false, | |
| "depth": 72, | |
| "emb_dropout": 0, | |
| "embed_dim": 2048, | |
| "encoder_attention_multiplier": 0.25, | |
| "encoder_attention_pattern": "RawColumnPreviousRowAttention", | |
| "encoder_attn_dropout": 0.0, | |
| "encoder_attn_res_scale": false, | |
| "encoder_blocks": 32, | |
| "encoder_depth": 18, | |
| "encoder_emb_dropout": 0.0, | |
| "encoder_heads": 4, | |
| "encoder_init_scale": 0.1, | |
| "encoder_loss_fraction": [ | |
| 0.4, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "encoder_mlp_multiplier": 1.0, | |
| "encoder_n_vocab": 79, | |
| "encoder_resid_dropout": 0.0, | |
| "encoder_spread": null, | |
| "encoder_width": 1280, | |
| "encoder_zero_out": false, | |
| "init_scale": 0.2, | |
| "is_encoder_decoder": true, | |
| "lyric_conditioning": true, | |
| "mask": true, | |
| "max_duration": 600.0, | |
| "max_nb_genres": 1, | |
| "merged_decoder": false, | |
| "metadata_conditioning": true, | |
| "metadata_dims": [ | |
| 604, | |
| 7898 | |
| ], | |
| "min_duration": 17.84, | |
| "mlp_multiplier": 1.0, | |
| "model_type": "jukebox", | |
| "n_ctx": 6144, | |
| "n_heads": 2, | |
| "nb_relevant_lyric_tokens": 384, | |
| "res_conv_depth": null, | |
| "res_conv_width": null, | |
| "res_convolution_multiplier": null, | |
| "res_dilation_cycle": null, | |
| "res_dilation_growth_rate": null, | |
| "res_downs_t": [ | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| "res_strides_t": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "resid_dropout": 0, | |
| "sampling_rate": 44100, | |
| "spread": null, | |
| "timing_dims": 64, | |
| "transformers_version": "4.25.0.dev0", | |
| "width": 2048, | |
| "zero_out": false | |
| }, | |
| "prior_1": { | |
| "act_fn": "quick_gelu", | |
| "alignment_head": null, | |
| "alignment_layer": null, | |
| "attention_multiplier": 0.25, | |
| "attention_pattern": "RawColumnPreviousRowAttention", | |
| "attn_dropout": 0, | |
| "attn_res_scale": false, | |
| "blocks": 64, | |
| "conv_res_scale": true, | |
| "depth": 72, | |
| "emb_dropout": 0, | |
| "embed_dim": 2048, | |
| "encoder_attention_multiplier": null, | |
| "encoder_attention_pattern": null, | |
| "encoder_attn_dropout": null, | |
| "encoder_attn_res_scale": false, | |
| "encoder_blocks": null, | |
| "encoder_depth": null, | |
| "encoder_emb_dropout": null, | |
| "encoder_heads": null, | |
| "encoder_init_scale": null, | |
| "encoder_loss_fraction": [ | |
| 0.4, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "encoder_mlp_multiplier": null, | |
| "encoder_n_vocab": 0, | |
| "encoder_resid_dropout": null, | |
| "encoder_spread": null, | |
| "encoder_width": null, | |
| "encoder_zero_out": null, | |
| "init_scale": 1, | |
| "is_encoder_decoder": false, | |
| "lyric_conditioning": false, | |
| "mask": true, | |
| "max_duration": 600.0, | |
| "max_nb_genres": 1, | |
| "merged_decoder": false, | |
| "metadata_conditioning": true, | |
| "metadata_dims": [ | |
| 120, | |
| 4111 | |
| ], | |
| "min_duration": 17.84, | |
| "mlp_multiplier": 1.0, | |
| "model_type": "jukebox", | |
| "n_ctx": 8192, | |
| "n_heads": 1, | |
| "nb_relevant_lyric_tokens": 0, | |
| "res_conv_depth": 16, | |
| "res_conv_width": 1024, | |
| "res_convolution_multiplier": 1, | |
| "res_dilation_cycle": 8, | |
| "res_dilation_growth_rate": 3, | |
| "res_downs_t": [ | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| "res_strides_t": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "resid_dropout": 0, | |
| "sampling_rate": 44100, | |
| "spread": null, | |
| "timing_dims": 64, | |
| "transformers_version": "4.25.0.dev0", | |
| "width": 1920, | |
| "zero_out": false | |
| }, | |
| "prior_2": { | |
| "act_fn": "quick_gelu", | |
| "alignment_head": null, | |
| "alignment_layer": null, | |
| "attention_multiplier": 0.25, | |
| "attention_pattern": "RawColumnPreviousRowAttention", | |
| "attn_dropout": 0, | |
| "attn_res_scale": false, | |
| "blocks": 64, | |
| "conv_res_scale": null, | |
| "depth": 72, | |
| "emb_dropout": 0, | |
| "embed_dim": 2048, | |
| "encoder_attention_multiplier": null, | |
| "encoder_attention_pattern": null, | |
| "encoder_attn_dropout": null, | |
| "encoder_attn_res_scale": false, | |
| "encoder_blocks": null, | |
| "encoder_depth": null, | |
| "encoder_emb_dropout": null, | |
| "encoder_heads": null, | |
| "encoder_init_scale": null, | |
| "encoder_loss_fraction": [ | |
| 0.4, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "encoder_mlp_multiplier": null, | |
| "encoder_n_vocab": 0, | |
| "encoder_resid_dropout": null, | |
| "encoder_spread": null, | |
| "encoder_width": null, | |
| "encoder_zero_out": null, | |
| "init_scale": 1, | |
| "is_encoder_decoder": false, | |
| "lyric_conditioning": false, | |
| "mask": true, | |
| "max_duration": 600.0, | |
| "max_nb_genres": 1, | |
| "merged_decoder": false, | |
| "metadata_conditioning": true, | |
| "metadata_dims": [ | |
| 120, | |
| 4111 | |
| ], | |
| "min_duration": 17.84, | |
| "mlp_multiplier": 1.0, | |
| "model_type": "jukebox", | |
| "n_ctx": 8192, | |
| "n_heads": 1, | |
| "nb_relevant_lyric_tokens": 0, | |
| "res_conv_depth": 16, | |
| "res_conv_width": 1024, | |
| "res_convolution_multiplier": 1, | |
| "res_dilation_cycle": 8, | |
| "res_dilation_growth_rate": 3, | |
| "res_downs_t": [ | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| "res_strides_t": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "resid_dropout": 0, | |
| "sampling_rate": 44100, | |
| "spread": null, | |
| "timing_dims": 64, | |
| "transformers_version": "4.25.0.dev0", | |
| "width": 1920, | |
| "zero_out": false | |
| }, | |
| "sampling_rate": 44100, | |
| "timing_dims": 64, | |
| "torch_dtype": "float32", | |
| "transformers_version": null, | |
| "vqvae_config": { | |
| "act_fn": "relu", | |
| "codebook_dimension": 2048, | |
| "commit": 0.02, | |
| "conv_input_shape": 1, | |
| "conv_res_scale": false, | |
| "embed_dim": 64, | |
| "hop_fraction": [ | |
| 0.125, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "levels": 3, | |
| "lmu": 0.99, | |
| "model_type": "", | |
| "multipliers": [ | |
| 2, | |
| 1, | |
| 1 | |
| ], | |
| "res_conv_depth": 4, | |
| "res_conv_width": 32, | |
| "res_convolution_multiplier": 1, | |
| "res_dilation_cycle": null, | |
| "res_dilation_growth_rate": 3, | |
| "res_downs_t": [ | |
| 3, | |
| 2, | |
| 2 | |
| ], | |
| "res_strides_t": [ | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "sample_length": 1058304, | |
| "transformers_version": "4.25.0.dev0" | |
| } | |
| } | |