MinaNasser commited on
Commit
f85bf15
·
1 Parent(s): 4f665b8
Files changed (2) hide show
  1. stores/providers/aaistt.py +14 -5
  2. stores/sttremotes.py +23 -3
stores/providers/aaistt.py CHANGED
@@ -9,6 +9,11 @@ config = aai.TranscriptionConfig(
9
  language_detection=True, # auto-detect language
10
  speaker_labels=True, # diarization
11
  )
 
 
 
 
 
12
 
13
  class AssemblyAISTT:
14
  def __init__(self):
@@ -21,11 +26,15 @@ class AssemblyAISTT:
21
  def sync_transcribe():
22
  transcript = self.client.transcribe(audio_path,config=config)
23
  if transcript.status == aai.TranscriptStatus.error:
24
- raise Exception(f"Transcription failed: {transcript.error}")
 
 
 
 
 
 
 
25
 
26
- text = transcript.text
27
- # Language detection is available in transcript.language_code
28
- language = transcript.language_code if transcript.language_code else "unknown"
29
- return text, language
30
 
31
  return await loop.run_in_executor(None, sync_transcribe)
 
9
  language_detection=True, # auto-detect language
10
  speaker_labels=True, # diarization
11
  )
12
+ _NO_SPEECH_ERRORS = (
13
+ "no spoken audio",
14
+ "language_detection cannot be performed",
15
+ "audio duration is too short",
16
+ )
17
 
18
  class AssemblyAISTT:
19
  def __init__(self):
 
26
  def sync_transcribe():
27
  transcript = self.client.transcribe(audio_path,config=config)
28
  if transcript.status == aai.TranscriptStatus.error:
29
+ error_msg = (transcript.error or "").lower()
30
+
31
+ # Silence — return empty so the factory raises NoSpeechDetected
32
+ if any(phrase in error_msg for phrase in _NO_SPEECH_ERRORS):
33
+ return "", ""
34
+
35
+ # Real error — let the factory wrap it as ProviderUnavailable
36
+ raise Exception(f"AssemblyAI transcription failed: {transcript.error}")
37
 
38
+ return transcript.text or "", transcript.language_code or "unknown"
 
 
 
39
 
40
  return await loop.run_in_executor(None, sync_transcribe)
stores/sttremotes.py CHANGED
@@ -5,6 +5,12 @@ from .providers.deepgramstt import DeepgramSTT
5
  from .providers.aaistt import AssemblyAISTT
6
  from .providers.customstt import CustomSTT
7
 
 
 
 
 
 
 
8
  class STTFactory:
9
  _providers: Dict[str, Type] = {
10
  "mistral": MistralSTT,
@@ -25,7 +31,21 @@ class STTRemoteManager:
25
  def __init__(self, default_provider: str = "mistral"):
26
  self.default_provider = default_provider
27
 
28
- async def transcribe_remote(self, audio_path: str, provider_name: str = None) -> str:
29
- provider_name = provider_name or self.default_provider
30
  provider = STTFactory.get_provider(provider_name)
31
- return await provider.transcribe(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from .providers.aaistt import AssemblyAISTT
6
  from .providers.customstt import CustomSTT
7
 
8
+ class NoSpeechDetected(Exception):
9
+ pass
10
+ class ProviderUnavailable(Exception):
11
+ pass
12
+
13
+
14
  class STTFactory:
15
  _providers: Dict[str, Type] = {
16
  "mistral": MistralSTT,
 
31
  def __init__(self, default_provider: str = "mistral"):
32
  self.default_provider = default_provider
33
 
34
+ async def transcribe_remote(self, audio_path: str, provider_name: str = None):
35
+ provider_name = (provider_name or self.default_provider).lower()
36
  provider = STTFactory.get_provider(provider_name)
37
+
38
+ try:
39
+ text, language = await provider.transcribe(audio_path)
40
+ except Exception as e:
41
+ # Real provider error — not silence
42
+ raise ProviderUnavailable(
43
+ f"Provider '{provider_name}' failed: {e}"
44
+ ) from e
45
+
46
+ if not text or not text.strip():
47
+ raise NoSpeechDetected(
48
+ f"Provider '{provider_name}' returned no speech for this chunk."
49
+ )
50
+
51
+ return text, language