File size: 2,318 Bytes
1905805 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | from fish_audio_sdk import AsyncWebSocketSession, TTSRequest
import os
from utils.config import Config
from .base import TTSOperation
class FishTTS(TTSOperation):
def __init__(self):
super().__init__("fish")
self.session = None
self.model_id = "c9198512a4164a18b11a3bf96e5c668f"
self.backend = "speech-1.6"
self.normalize = False
self.latency = "normal"
async def start(self) -> None:
'''General setup needed to start generated'''
await super().start()
self.session = AsyncWebSocketSession(os.getenv("FISH_API_KEY"))
async def close(self) -> None:
'''Clean up resources before unloading'''
await super().close()
await self.session.close()
self.session = None
async def configure(self, config_d):
'''Configure and validate operation-specific configuration'''
if "model_id" in config_d: self.model_id = str(config_d["model_id"])
if "backend" in config_d: self.backend = str(config_d["backend"])
if "normalize" in config_d: self.normalize = bool(config_d["normalize"])
if "latency" in config_d: self.latency = str(config_d["latency"])
assert self.model_id is not None and len(self.model_id) > 0
assert self.backend is not None and len(self.backend) > 0
assert self.latency in ['normal', 'balanced']
async def get_configuration(self):
'''Returns values of configurable fields'''
return {
"model_id": self.model_id,
"backend": self.backend,
"normalize": self.normalize,
"latency": self.latency,
}
async def _generate(self, content: str = None, **kwargs):
'''Generate a output stream'''
tts_request = TTSRequest(
text=content,
format="pcm",
normalize=self.normalize,
latency=self.latency,
reference_id=self.model_id
)
b = b''
async for chunk in self.session.tts(
tts_request,
self._stream(),
backend=self.backend
):
b += chunk
yield {"audio_bytes": b, "sr": 44100, "sw": 2, "ch": 1}
async def _stream(self):
yield "" |