VTuberAI / src /utils /operations /tts /openai.py
Saidie000's picture
Upload 90 files
1905805 verified
import wave
from io import BytesIO
from openai import AsyncOpenAI
from utils.config import Config
from .base import TTSOperation
class OpenAITTS(TTSOperation):
def __init__(self):
super().__init__("openai")
self.client = None
self.base_url = "https://api.openai.com/v1/"
self.voice = "nova"
self.model = "tts-1"
async def start(self) -> None:
'''General setup needed to start generated'''
await super().start()
self.client = AsyncOpenAI(base_url=self.base_url)
async def close(self) -> None:
'''Clean up resources before unloading'''
await super().close()
await self.client.close()
self.client = None
async def configure(self, config_d):
'''Configure and validate operation-specific configuration'''
if "base_url" in config_d: self.base_url = str(config_d["base_url"])
if "voice" in config_d: self.voice = str(config_d["voice"])
if "model" in config_d: self.model = str(config_d["model"])
assert self.base_url is not None and len(self.base_url) > 0
assert self.voice is not None and len(self.voice) > 0
assert self.model is not None and len(self.model) > 0
async def get_configuration(self):
'''Returns values of configurable fields'''
return {
"base_url": self.base_url,
"voice": self.voice,
"model": self.model
}
async def _generate(self, content: str = None, **kwargs):
'''Generate a output stream'''
async with self.client.audio.speech.with_streaming_response.create(
model=self.model,
voice=self.voice,
input=content,
response_format="wav",
) as response:
output_b = BytesIO(await response.read())
with wave.open(output_b, "r") as f:
sr = f.getframerate()
sw = f.getsampwidth()
ch = f.getnchannels()
ab = f.readframes(f.getnframes())
yield {
"audio_bytes": ab,
"sr": sr,
"sw": sw,
"ch": ch
}