Spaces:
Paused
Paused
| """ | |
| STT (Speech-to-Text) Interface for Flare - Simple Batch Mode | |
| """ | |
| from abc import ABC, abstractmethod | |
| from typing import Optional, List | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| class STTEngineType(Enum): | |
| NO_STT = "no_stt" | |
| GOOGLE = "google" | |
| AZURE = "azure" | |
| AMAZON = "amazon" | |
| DEEPGRAM = "deepgram" | |
| class STTConfig: | |
| """STT configuration parameters""" | |
| language: str = "tr-TR" | |
| sample_rate: int = 16000 | |
| encoding: str = "LINEAR16" | |
| enable_punctuation: bool = True | |
| enable_word_timestamps: bool = False | |
| model: str = "latest_long" | |
| use_enhanced: bool = True | |
| class TranscriptionResult: | |
| """Result from STT engine""" | |
| text: str | |
| confidence: float | |
| timestamp: float | |
| language: Optional[str] = None | |
| word_timestamps: Optional[List[dict]] = None | |
| class STTInterface(ABC): | |
| """Abstract base class for STT providers - Simple batch mode""" | |
| async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]: | |
| """ | |
| Transcribe audio data | |
| Args: | |
| audio_data: Raw PCM audio data (LINEAR16 format) | |
| config: STT configuration | |
| Returns: | |
| TranscriptionResult or None if no speech detected | |
| """ | |
| pass | |
| def get_supported_languages(self) -> List[str]: | |
| """Get list of supported language codes""" | |
| pass | |
| def get_provider_name(self) -> str: | |
| """Get provider name for logging""" | |
| pass |