Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import VitsModel, AutoTokenizer | |
| import numpy as np | |
| import scipy.io.wavfile as wavfile | |
| import gradio as gr | |
| def yes(texte): | |
| model = VitsModel.from_pretrained("facebook/mms-tts-eng") | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") | |
| text = texte | |
| inputs = tokenizer(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model(**inputs).waveform | |
| # Normaliser les données audio dans la plage [-1, 1] | |
| output_normalized = output / torch.max(torch.abs(output)) | |
| # Convertir en tableau numpy avec le bon type | |
| audio_data = output_normalized.squeeze().cpu().numpy() | |
| # Mettre à l'échelle dans la plage de valeurs acceptées par WAV | |
| audio_data_scaled = np.int16(audio_data * 32767) | |
| # Enregistrer les données audio dans un fichier WAV | |
| wavfile.write("techno.wav", rate=model.config.sampling_rate, data=audio_data_scaled) | |
| with open("techno.wav",'rb') as audio: | |
| audio_data = audio.read() | |
| return audio_data | |
| text = gr.Interface(fn=yes, inputs='text', outputs='audio') | |
| text.launch(debug=True) |