""" BarVox Audio Processing API - Model Loader Module VERSION 2.2: Includes Wav2Vec2 CTC and base models """ import logging import torch logger = logging.getLogger(__name__) _MODELS = {} def load_models(): """Load all required models into memory.""" global _MODELS logger.info("Loading models...") # HuBERT CTC from transformers import Wav2Vec2Processor, HubertForCTC, HubertModel _MODELS['hubert_processor'] = Wav2Vec2Processor.from_pretrained("facebook/hubert-large-ls960-ft") _MODELS['hubert_model'] = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") _MODELS['hubert_base_model'] = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft") logger.info("✓ HuBERT models loaded") # Wav2Vec2 CTC from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2Model _MODELS['wav2vec2_processor'] = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") _MODELS['wav2vec2_model'] = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") _MODELS['wav2vec2_base_model'] = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") logger.info("✓ Wav2Vec2 models loaded") # TRILL try: import tensorflow_hub as hub _MODELS['trill_model'] = hub.load('https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3') logger.info("✓ TRILL model loaded") except Exception as e: logger.warning(f"TRILL model failed to load (non-fatal): {e}") # Silero VAD silero_model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False, trust_repo=True) _MODELS['silero_vad'] = silero_model _MODELS['silero_utils'] = utils logger.info("✓ Silero VAD loaded") # Allosaurus try: from allosaurus.app import read_recognizer _MODELS['allosaurus_model'] = read_recognizer() logger.info("✓ Allosaurus loaded") except Exception as e: logger.warning(f"Allosaurus model failed to load (non-fatal): {e}") logger.info("All models loaded successfully!") def get_models(): """Get the loaded models dictionary.""" return _MODELS