activations: Tanh batch_size: 2 class_identifier: speech_regression_metric dropout: 0.1 encoder_learning_rate: 1.0e-06 encoder_model: XLM-RoBERTa encoder_model_audio: sonar_speech_encoder_eng final_activation: null fuse_emb_strategy: avg hidden_sizes: - 2048 - 1024 input_modality: text keep_embeddings_frozen: true layer: mix layer_norm: false layer_transformation: sparsemax layerwise_decay: 0.95 learning_rate: 1.5e-05 load_pretrained_weights: true local_files_only: false loss: mse nr_frozen_epochs: 0.3 optimizer: AdamW pool: avg pretrained_model: xlm-roberta-base train_data: - maikezu/iwslt2026-metrics-shared-train-dev validation_data: - maikezu/iwslt2026-metrics-shared-train-dev warmup_steps: 0