import wave from openai import AsyncOpenAI from pathlib import Path from utils.config import Config from .base import STTOperation class OpenAISTT(STTOperation): def __init__(self): super().__init__("openai") self.client = None self.base_url: str = "https://api.openai.com/v1/" self.model: str = "gpt-4o" self.language: str = "en" async def start(self) -> None: '''General setup needed to start generated''' await super().start() self.client = AsyncOpenAI(base_url=self.base_url) async def close(self) -> None: '''Clean up resources before unloading''' await super().close() self.client.close() self.client = None async def configure(self, config_d): '''Configure and validate operation-specific configuration''' if "base_url" in config_d: self.base_url = str(config_d['base_url']) if "model" in config_d: self.model = str(config_d['model']) if "language" in config_d: self.language = str(config_d['language']) assert self.base_url is not None and len(self.base_url) > 0 assert self.model is not None and len(self.model) > 0 assert self.language is not None and len(self.language) > 0 async def get_configuration(self): '''Returns values of configurable fields''' return { "base_url": self.base_url, "model": self.model, "language": self.language } async def _generate(self, prompt: str = None, audio_bytes: bytes = None, sr: int = None, sw: int = None, ch: int = None, **kwargs): '''Generate a output stream''' with wave.open(Config().stt_working_src, 'w') as f: f.setframerate(sr) f.setsampwidth(sw) f.setnchannels(ch) f.writeframes(audio_bytes) transcription = await self.client.audio.transcriptions.create( file=Path(Config().stt_working_src), model=self.model, response_format="text", language=self.language, prompt=prompt ) yield {"transcription": transcription}