| _target_: modules.astral_quantization.default_model.AstralQuantizer
|
| tokenizer_name: "openai/whisper-small"
|
| ssl_model_name: "facebook/hubert-large-ll60k"
|
| ssl_output_layer: 18
|
| encoder:
|
| _target_: modules.astral_quantization.convnext.ConvNeXtV2Stage
|
| dim: 512
|
| num_blocks: 12
|
| intermediate_dim: 1536
|
| dilation: 1
|
| input_dim: 1024
|
| quantizer:
|
| _target_: modules.astral_quantization.bsq.BinarySphericalQuantize
|
| codebook_size: 2048
|
| dim: 512
|
| entropy_loss_weight: 0.1
|
| diversity_gamma: 1.0
|
| spherical: True
|
| enable_entropy_loss: True
|
| soft_entropy_loss: True
|
| decoder:
|
| _target_: modules.astral_quantization.convnext.ConvNeXtV2Stage
|
| dim: 512
|
| num_blocks: 12
|
| intermediate_dim: 1536
|
| dilation: 1
|
| output_dim: 1024
|
| gin_channels: 192
|
| asr_decoder:
|
| _target_: modules.astral_quantization.asr_decoder.ASRDecoder
|
| hidden_dim: 768
|
| num_heads: 12
|
| depth: 12
|
| block_size: 4096
|
| in_channels: 512
|
| n_vocab: 51866
|
| bos_id: 50528
|
| eos_id: 50527
|
| dropout_rate: 0.0
|
| attn_dropout_rate: 0.0 |