Spaces:
Runtime error
Runtime error
Commit ·
f85bf15
1
Parent(s): 4f665b8
6th
Browse files- stores/providers/aaistt.py +14 -5
- stores/sttremotes.py +23 -3
stores/providers/aaistt.py
CHANGED
|
@@ -9,6 +9,11 @@ config = aai.TranscriptionConfig(
|
|
| 9 |
language_detection=True, # auto-detect language
|
| 10 |
speaker_labels=True, # diarization
|
| 11 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
class AssemblyAISTT:
|
| 14 |
def __init__(self):
|
|
@@ -21,11 +26,15 @@ class AssemblyAISTT:
|
|
| 21 |
def sync_transcribe():
|
| 22 |
transcript = self.client.transcribe(audio_path,config=config)
|
| 23 |
if transcript.status == aai.TranscriptStatus.error:
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
text
|
| 27 |
-
# Language detection is available in transcript.language_code
|
| 28 |
-
language = transcript.language_code if transcript.language_code else "unknown"
|
| 29 |
-
return text, language
|
| 30 |
|
| 31 |
return await loop.run_in_executor(None, sync_transcribe)
|
|
|
|
| 9 |
language_detection=True, # auto-detect language
|
| 10 |
speaker_labels=True, # diarization
|
| 11 |
)
|
| 12 |
+
_NO_SPEECH_ERRORS = (
|
| 13 |
+
"no spoken audio",
|
| 14 |
+
"language_detection cannot be performed",
|
| 15 |
+
"audio duration is too short",
|
| 16 |
+
)
|
| 17 |
|
| 18 |
class AssemblyAISTT:
|
| 19 |
def __init__(self):
|
|
|
|
| 26 |
def sync_transcribe():
|
| 27 |
transcript = self.client.transcribe(audio_path,config=config)
|
| 28 |
if transcript.status == aai.TranscriptStatus.error:
|
| 29 |
+
error_msg = (transcript.error or "").lower()
|
| 30 |
+
|
| 31 |
+
# Silence — return empty so the factory raises NoSpeechDetected
|
| 32 |
+
if any(phrase in error_msg for phrase in _NO_SPEECH_ERRORS):
|
| 33 |
+
return "", ""
|
| 34 |
+
|
| 35 |
+
# Real error — let the factory wrap it as ProviderUnavailable
|
| 36 |
+
raise Exception(f"AssemblyAI transcription failed: {transcript.error}")
|
| 37 |
|
| 38 |
+
return transcript.text or "", transcript.language_code or "unknown"
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
return await loop.run_in_executor(None, sync_transcribe)
|
stores/sttremotes.py
CHANGED
|
@@ -5,6 +5,12 @@ from .providers.deepgramstt import DeepgramSTT
|
|
| 5 |
from .providers.aaistt import AssemblyAISTT
|
| 6 |
from .providers.customstt import CustomSTT
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
class STTFactory:
|
| 9 |
_providers: Dict[str, Type] = {
|
| 10 |
"mistral": MistralSTT,
|
|
@@ -25,7 +31,21 @@ class STTRemoteManager:
|
|
| 25 |
def __init__(self, default_provider: str = "mistral"):
|
| 26 |
self.default_provider = default_provider
|
| 27 |
|
| 28 |
-
async def transcribe_remote(self, audio_path: str, provider_name: str = None)
|
| 29 |
-
provider_name = provider_name or self.default_provider
|
| 30 |
provider = STTFactory.get_provider(provider_name)
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from .providers.aaistt import AssemblyAISTT
|
| 6 |
from .providers.customstt import CustomSTT
|
| 7 |
|
| 8 |
+
class NoSpeechDetected(Exception):
|
| 9 |
+
pass
|
| 10 |
+
class ProviderUnavailable(Exception):
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
|
| 14 |
class STTFactory:
|
| 15 |
_providers: Dict[str, Type] = {
|
| 16 |
"mistral": MistralSTT,
|
|
|
|
| 31 |
def __init__(self, default_provider: str = "mistral"):
|
| 32 |
self.default_provider = default_provider
|
| 33 |
|
| 34 |
+
async def transcribe_remote(self, audio_path: str, provider_name: str = None):
|
| 35 |
+
provider_name = (provider_name or self.default_provider).lower()
|
| 36 |
provider = STTFactory.get_provider(provider_name)
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
text, language = await provider.transcribe(audio_path)
|
| 40 |
+
except Exception as e:
|
| 41 |
+
# Real provider error — not silence
|
| 42 |
+
raise ProviderUnavailable(
|
| 43 |
+
f"Provider '{provider_name}' failed: {e}"
|
| 44 |
+
) from e
|
| 45 |
+
|
| 46 |
+
if not text or not text.strip():
|
| 47 |
+
raise NoSpeechDetected(
|
| 48 |
+
f"Provider '{provider_name}' returned no speech for this chunk."
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
return text, language
|