Fix emotion2vec: use correct PT-BR model

- Changed to alefiury/wav2vec2-xls-r-300m-pt-br-spontaneous-speech-emotion-recognition
- Winner of SE&R 2022 Workshop for Portuguese speech
- Trained on CORAA SER v1.0 + multilingual datasets (EMOVO, RAVDESS, BAVED)
- 3 classes: neutral, non_neutral_female, non_neutral_male
- Model exists and is compatible with transformers

Previous attempt used non-existent model name.

Files changed (1) hide show

ensemble_tts/models/emotion.py +7 -7

ensemble_tts/models/emotion.py CHANGED Viewed

@@ -43,14 +43,14 @@ class Emotion2VecModel(BaseModel):
                 self.model = Wav2Vec2ForSequenceClassification.from_pretrained(str(finetuned_path))
                 logger.info("✅ Using FINE-TUNED model (trained on VERBO/emoUERJ)")
             else:
-                # Use wav2vec2 as compatible alternative
-                logger.info("Loading wav2vec2-large-xlsr-53...")
-                self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53")
-                self.model = Wav2Vec2ForSequenceClassification.from_pretrained(
-                    "alefiury/wav2vec2-large-xlsr-53-portuguese-emotion-recognition"
-                )
                 if self.use_finetuned:
-                    logger.warning("⚠️  Fine-tuned model not found, using base wav2vec2 PT-BR")
                     logger.info("To fine-tune: python scripts/training/finetune_emotion2vec.py")
             self.model.to(self.device)

                 self.model = Wav2Vec2ForSequenceClassification.from_pretrained(str(finetuned_path))
                 logger.info("✅ Using FINE-TUNED model (trained on VERBO/emoUERJ)")
             else:
+                # Use PT-BR emotion recognition model as compatible alternative
+                # Winner of SE&R 2022 Workshop for Portuguese speech
+                pt_br_model = "alefiury/wav2vec2-xls-r-300m-pt-br-spontaneous-speech-emotion-recognition"
+                logger.info(f"Loading {pt_br_model}...")
+                self.processor = Wav2Vec2Processor.from_pretrained(pt_br_model)
+                self.model = Wav2Vec2ForSequenceClassification.from_pretrained(pt_br_model)
                 if self.use_finetuned:
+                    logger.warning("⚠️  Fine-tuned model not found, using pre-trained PT-BR model")
                     logger.info("To fine-tune: python scripts/training/finetune_emotion2vec.py")
             self.model.to(self.device)