{ "model_type": "audiogen", "nQ": 4, "card": 2048, "dim": 1536, "numHeads": 24, "hiddenScale": 4, "numLayers": 48, "causal": true, "crossAttention": true, "dropout": 0.0, "activation": "gelu", "norm": "layer_norm", "normFirst": true, "biasFF": false, "biasAttn": false, "layerScale": null, "context": 4096, "maxPeriod": 10000, "positionalEmbedding": "sin", "positionalScale": 1.0, "xPos": false, "weight": 1.0, "conditionProvider": "t5", "twoStepCFG": false, "kvRepeat": 1, "qkLayerNorm": false, "emptyLikeInit": false, "emptyLikeInitDetokenized": false, "zeroHypothesisRate": 0.0, "quantize": false, "weightsPerStep": null, "normalize": true, "frameRate": 50.0, "sampleRate": 16000, "duration": 10.0, "numSamples": 1, "specialToken": 2048, "tokenizer": "t5-large", "t5_model_name": "t5-large", "clsToken": 2048, "padToken": 2048, "encodec": { "model_type": "encodec", "audio_channels": 1, "num_filters": 32, "kernel_size": 7, "num_residual_layers": 1, "dilation_growth_rate": 2, "codebook_size": 2048, "codebook_dim": 128, "hidden_size": 128, "num_lstm_layers": 2, "residual_kernel_size": 3, "use_causal_conv": true, "normalize": false, "pad_mode": "reflect", "norm_type": "time_group_norm", "last_kernel_size": 7, "trim_right_ratio": 1.0, "compress": 2, "upsampling_ratios": [8, 5, 4, 2], "target_bandwidths": [1.5, 3.0, 6.0, 12.0, 24.0], "sampling_rate": 16000, "chunk_length_s": null, "overlap": null, "use_conv_shortcut": false }, "t5": { "model_name": "t5-large", "d_model": 1024, "d_kv": 64, "d_ff": 4096, "num_layers": 24, "num_heads": 16, "relative_attention_num_buckets": 32, "relative_attention_max_distance": 128, "dropout_rate": 0.0, "layer_norm_epsilon": 1e-06, "feed_forward_proj": "relu", "vocab_size": 32128, "tie_word_embeddings": true } }