import gc from os import getenv from transformers import pipeline from utils import spaces_gpu @spaces_gpu def text_to_speech(text: str) -> tuple[int, bytes]: narrator = pipeline("text-to-speech", getenv("TEXT_TO_SPEECH_MODEL")) del narrator gc.collect() result = narrator(text) return (result["sampling_rate"], result["audio"][0])