# ################################ # ProTeVa Inference Configuration # Simplified YAML for deployment # ################################ # Basic settings seed: 200 device: cpu # Change to cuda if GPU available sample_rate: 16000 # Output neurons (4 classes: blank, high, low, mid) # Based on labelencoder.txt: 0=blank, 1=H, 2=B, 3=M # Space (4) is added via post-processing output_neurons: 4 blank_index: 0 # Number of prototypes n_prototypes: 10 # Feature dimension from HuBERT emb_dim: 768 # Encoder settings rnn_layers: 2 rnn_neurons: 512 # Decoder settings dnn_blocks: 2 dnn_neurons: 512 # Pitch decoder settings dec_dnn_blocks: [1] dec_dnn_neurons: [128] # Activation function activation: !name:torch.nn.LeakyReLU # ============ MODULES ============ # HuBERT feature extractor wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.hubert.HuBERT source: "Orange/SSA-HuBERT-base-60k" output_norm: True freeze: False save_path: whubert_checkpoint # F0 extractor (requires custom module) f0Compute: !new:modules.F0Extractor device: !ref sample_rate: !ref # BiGRU Encoder enc: !new:speechbrain.nnet.RNN.GRU input_shape: [null, null, !ref ] hidden_size: !ref num_layers: !ref bidirectional: True dropout: 0.15 # VanillaNN Decoder dec: !new:speechbrain.lobes.models.VanillaNN.VanillaNN input_shape: [null, null, 1024] # 512 * 2 (bidirectional) activation: !ref dnn_blocks: !ref dnn_neurons: !ref # Pitch Decoder (requires custom module) pitch_dec: !new:modules.PitchDecoderLayer input_shape: [null, null, !ref ] dnn_blocks: !ref dnn_neurons: !ref # Prototype Layer (requires custom module) proto: !new:modules.PrototypeLayer n_prototypes: !ref latent_dims: !ref # Output linear layer output_lin: !new:speechbrain.nnet.linear.Linear input_size: !ref n_neurons: !ref bias: True # Log softmax log_softmax: !new:speechbrain.nnet.activations.Softmax apply_log: True # Label encoder label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder # ============ MODULES DICT ============ modules: wav2vec2: !ref enc: !ref dec: !ref pitch_dec: !ref proto: !ref output_lin: !ref # Model container for all modules model: !new:torch.nn.ModuleList - [!ref , !ref , !ref , !ref , !ref ] # ============ PRETRAINER ============ # This loads the trained checkpoints pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: model: !ref wav2vec2: !ref tokenizer: !ref paths: model: !ref /model.ckpt wav2vec2: !ref /wav2vec2.ckpt tokenizer: !ref /tokenizer.ckpt # Save folder - Path is loaded from config.py # To change checkpoint folder, update CHECKPOINT_FOLDER in config.py save_folder: ./CKPT+2025-10-20+08-19-07+00