File size: 2,221 Bytes
1905805
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import wave
from openai import AsyncOpenAI
from pathlib import Path

from utils.config import Config

from .base import STTOperation

class OpenAISTT(STTOperation):
    def __init__(self):
        super().__init__("openai")
        self.client = None
        
        self.base_url: str = "https://api.openai.com/v1/"
        self.model: str = "gpt-4o"
        self.language: str = "en"
        
    async def start(self) -> None:
        '''General setup needed to start generated'''
        await super().start()
        self.client = AsyncOpenAI(base_url=self.base_url)
    
    async def close(self) -> None:
        '''Clean up resources before unloading'''
        await super().close()
        self.client.close()
        self.client = None
            
    async def configure(self, config_d):
        '''Configure and validate operation-specific configuration'''
        if "base_url" in config_d: self.base_url = str(config_d['base_url'])
        if "model" in config_d: self.model = str(config_d['model'])
        if "language" in config_d: self.language = str(config_d['language'])
        
        assert self.base_url is not None and len(self.base_url) > 0
        assert self.model is not None and len(self.model) > 0
        assert self.language is not None and len(self.language) > 0
        
    async def get_configuration(self):
        '''Returns values of configurable fields'''
        return {
            "base_url": self.base_url,
            "model": self.model,
            "language": self.language
        }

    async def _generate(self, prompt: str = None,  audio_bytes: bytes = None, sr: int = None, sw: int = None, ch: int = None, **kwargs):
        '''Generate a output stream'''
        with wave.open(Config().stt_working_src, 'w') as f:
            f.setframerate(sr)
            f.setsampwidth(sw)
            f.setnchannels(ch)
            f.writeframes(audio_bytes)

        transcription = await self.client.audio.transcriptions.create(
            file=Path(Config().stt_working_src),
            model=self.model,
            response_format="text",
            language=self.language,
            prompt=prompt
        )
        
        yield {"transcription": transcription}