File size: 1,261 Bytes
3a67b40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from typing import Dict, List, Any
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import torch
import io
import os
class EndpointHandler():
def __init__(self):
self.pipeline = KPipeline(lang_code='a')
def __call__(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
text = inputs["text"]
voice = inputs["voice"]
audio_segments = []
generator = self.pipeline(text, voice)
for i, (gs, ps, audio) in enumerate(generator):
# Save temporarily just in case (optional if you skip saving to disk)
filename = f"{i}.wav"
sf.write(filename, audio, 24000)
audio_segments.append(audio)
# Concatenate all audio segments
full_audio = torch.cat([torch.tensor(a) for a in audio_segments])
# Write full audio to a binary buffer
buffer = io.BytesIO()
sf.write(buffer, full_audio.numpy(), 24000, format='WAV')
buffer.seek(0)
# Clean up temp files
for i in range(len(audio_segments)):
try:
os.remove(f"{i}.wav")
except FileNotFoundError:
pass
return {
"audio": buffer.read()
} |