File size: 2,221 Bytes
1905805 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import wave
from openai import AsyncOpenAI
from pathlib import Path
from utils.config import Config
from .base import STTOperation
class OpenAISTT(STTOperation):
def __init__(self):
super().__init__("openai")
self.client = None
self.base_url: str = "https://api.openai.com/v1/"
self.model: str = "gpt-4o"
self.language: str = "en"
async def start(self) -> None:
'''General setup needed to start generated'''
await super().start()
self.client = AsyncOpenAI(base_url=self.base_url)
async def close(self) -> None:
'''Clean up resources before unloading'''
await super().close()
self.client.close()
self.client = None
async def configure(self, config_d):
'''Configure and validate operation-specific configuration'''
if "base_url" in config_d: self.base_url = str(config_d['base_url'])
if "model" in config_d: self.model = str(config_d['model'])
if "language" in config_d: self.language = str(config_d['language'])
assert self.base_url is not None and len(self.base_url) > 0
assert self.model is not None and len(self.model) > 0
assert self.language is not None and len(self.language) > 0
async def get_configuration(self):
'''Returns values of configurable fields'''
return {
"base_url": self.base_url,
"model": self.model,
"language": self.language
}
async def _generate(self, prompt: str = None, audio_bytes: bytes = None, sr: int = None, sw: int = None, ch: int = None, **kwargs):
'''Generate a output stream'''
with wave.open(Config().stt_working_src, 'w') as f:
f.setframerate(sr)
f.setsampwidth(sw)
f.setnchannels(ch)
f.writeframes(audio_bytes)
transcription = await self.client.audio.transcriptions.create(
file=Path(Config().stt_working_src),
model=self.model,
response_format="text",
language=self.language,
prompt=prompt
)
yield {"transcription": transcription} |