Kokoro-82M / handler.py
aiplexdeveloper's picture
Create handler.py
3a67b40 verified
raw
history blame
1.26 kB
from typing import Dict, List, Any
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import torch
import io
import os
class EndpointHandler():
def __init__(self):
self.pipeline = KPipeline(lang_code='a')
def __call__(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
text = inputs["text"]
voice = inputs["voice"]
audio_segments = []
generator = self.pipeline(text, voice)
for i, (gs, ps, audio) in enumerate(generator):
# Save temporarily just in case (optional if you skip saving to disk)
filename = f"{i}.wav"
sf.write(filename, audio, 24000)
audio_segments.append(audio)
# Concatenate all audio segments
full_audio = torch.cat([torch.tensor(a) for a in audio_segments])
# Write full audio to a binary buffer
buffer = io.BytesIO()
sf.write(buffer, full_audio.numpy(), 24000, format='WAV')
buffer.seek(0)
# Clean up temp files
for i in range(len(audio_segments)):
try:
os.remove(f"{i}.wav")
except FileNotFoundError:
pass
return {
"audio": buffer.read()
}