File size: 652 Bytes
9cc385f 983919b 9cc385f 7dadaf1 9cc385f 7dadaf1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from transformers import VitsModel, AutoTokenizer
import torch
import numpy as np
from scipy.io.wavfile import write
model = VitsModel.from_pretrained("../hindi-tts")
tokenizer = AutoTokenizer.from_pretrained("../hindi-tts")
text = "नमस्ते, आप कैसे हैं? मैं टैक्स ऑफिस से बोल रहा हूँ"
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
output = output.squeeze()
output_np = output.cpu().numpy()
output_int16 = (output_np * 32767).astype(np.int16)
write("hindi.wav", rate=model.config.sampling_rate, data=output_int16)
|