Spaces:

UcsTurkey
/

flare

Paused

flare / stt /stt_interface.py

Update stt/stt_interface.py

6aeaf3c verified 7 months ago

1.62 kB

	"""
	STT (Speech-to-Text) Interface for Flare - Simple Batch Mode
	"""
	from abc import ABC, abstractmethod
	from typing import Optional, List
	from dataclasses import dataclass
	from enum import Enum


	class STTEngineType(Enum):
	NO_STT = "no_stt"
	GOOGLE = "google"
	AZURE = "azure"
	AMAZON = "amazon"
	DEEPGRAM = "deepgram"


	@dataclass
	class STTConfig:
	"""STT configuration parameters"""
	language: str = "tr-TR"
	sample_rate: int = 16000
	encoding: str = "LINEAR16"
	enable_punctuation: bool = True
	enable_word_timestamps: bool = False
	model: str = "latest_long"
	use_enhanced: bool = True


	@dataclass
	class TranscriptionResult:
	"""Result from STT engine"""
	text: str
	confidence: float
	timestamp: float
	language: Optional[str] = None
	word_timestamps: Optional[List[dict]] = None


	class STTInterface(ABC):
	"""Abstract base class for STT providers - Simple batch mode"""

	@abstractmethod
	async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
	"""
	Transcribe audio data

	Args:
	audio_data: Raw PCM audio data (LINEAR16 format)
	config: STT configuration

	Returns:
	TranscriptionResult or None if no speech detected
	"""
	pass

	@abstractmethod
	def get_supported_languages(self) -> List[str]:
	"""Get list of supported language codes"""
	pass

	@abstractmethod
	def get_provider_name(self) -> str:
	"""Get provider name for logging"""
	pass