|
|
|
|
|
|
|
|
from typing import Dict, Any |
|
|
from transformers import pipeline |
|
|
|
|
|
class EndpointHandler: |
|
|
def __init__(self, model_path: str = ""): |
|
|
""" |
|
|
Load the MMS-TTS pipeline once at startup. |
|
|
transformers>=4.33.0 is required for MMS-TTS support. |
|
|
""" |
|
|
self.tts = pipeline( |
|
|
task="text-to-speech", |
|
|
model=model_path, |
|
|
device=0, |
|
|
|
|
|
) |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: |
|
|
""" |
|
|
data: {"inputs": "<text to synthesize>"} |
|
|
Returns: {"wav": <binary audio>, "sampling_rate": <int>} |
|
|
""" |
|
|
text = data.get("inputs", "") |
|
|
|
|
|
result = self.tts(text) |
|
|
audio = result["audio"] |
|
|
return { |
|
|
"array": audio.T.tolist(), |
|
|
"sampling_rate": result["sampling_rate"], |
|
|
} |
|
|
|