from typing import Dict, Type from .providers.mistralstt import MistralSTT from .providers.groqstt import GroqSTT from .providers.deepgramstt import DeepgramSTT from .providers.aaistt import AssemblyAISTT from .providers.customstt import CustomSTT class NoSpeechDetected(Exception): pass class ProviderUnavailable(Exception): pass class STTFactory: _providers: Dict[str, Type] = { "mistral": MistralSTT, "groq": GroqSTT, "deepgram": DeepgramSTT, "assemblyai": AssemblyAISTT, "custom": CustomSTT, } @classmethod def get_provider(cls, provider_name: str): provider_class = cls._providers.get(provider_name.lower()) if not provider_class: raise ValueError(f"STT provider '{provider_name}' not found") return provider_class() class STTRemoteManager: def __init__(self, default_provider: str = "mistral"): self.default_provider = default_provider async def transcribe_remote(self, audio_path: str, provider_name: str = None): provider_name = (provider_name or self.default_provider).lower() provider = STTFactory.get_provider(provider_name) try: text, language = await provider.transcribe(audio_path) except Exception as e: # Real provider error — not silence raise ProviderUnavailable( f"Provider '{provider_name}' failed: {e}" ) from e if not text or not text.strip(): raise NoSpeechDetected( f"Provider '{provider_name}' returned no speech for this chunk." ) return text, language