File size: 2,134 Bytes
4cbdd15
 
ab655eb
4cbdd15
 
 
 
 
 
 
 
 
 
 
 
 
 
ab655eb
 
 
 
 
4cbdd15
 
 
 
 
 
 
ab655eb
4cbdd15
 
 
 
 
 
 
 
ab655eb
4cbdd15
 
 
 
 
 
 
5db1872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cbdd15
 
 
6facbae
b0cdf80
4cbdd15
 
 
 
83dd650
4cbdd15
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from tts_infer.tts import TextToMel, MelToWav
from tts_infer.num_to_word_on_sent import normalize_nums
# from ai4bharat.transliteration import XlitEngine
import gradio as gr


device = 'cpu'

def create_text_to_mel(glow_model_dir):
    return TextToMel(glow_model_dir=glow_model_dir, device=device)

text_to_mel_female = create_text_to_mel('checkpoints/glow/female')
mel_to_wav_female = MelToWav(hifi_model_dir='checkpoints/hifi/female', device=device)

text_to_mel_male = create_text_to_mel('checkpoints/glow/male')
mel_to_wav_male = MelToWav(hifi_model_dir='checkpoints/hifi/male', device=device)

# def translit(text, lang):
#     engine = XlitEngine(lang)
#     words = [engine.translit_word(word, topk=1)[lang][0] for word in text.split()]
#     updated_sent = ' '.join(words)
#     return updated_sent



def run_tts(text, selected_voice):
    lang = "pa"  # Punjabi language code
    text = text.replace('।', '.')  
    text_num_to_word = normalize_nums(text, lang)  # converting numbers to words in lang
    # text_num_to_word_and_transliterated = translit(text_num_to_word, lang)  # transliterating English words to lang

    if selected_voice == "Male Voice":
        text_to_mel = text_to_mel_male
        mel_to_wav = mel_to_wav_male
    else:
        text_to_mel = text_to_mel_female
        mel_to_wav = mel_to_wav_female

    mel = text_to_mel.generate_mel(text_num_to_word)
    audio, sr = mel_to_wav.generate_wav(mel)
    return sr, audio





# iface = gr.Interface(
#     fn=run_tts,
#     inputs=[
#         "textbox",
#         gr.inputs.Dropdown(
#             choices=["Male Voice", "Female Voice"],
#             default="Female Voice",
#             label="Select Voice"
#         )
#     ],
#     outputs="audio",
#     title="Text to Speech Punjabi Language"
# )

# iface.launch()


iface = gr.Interface(
    fn=run_tts,
    inputs=[
        gr.Textbox(label="Enter Text"),
        gr.Radio(
            choices=["Male Voice", "Female Voice"],
            label="Select Voice"
        )
    ],
    outputs=gr.Audio(label="Generated Audio"),
    title="Text to Speech Punjabi Language"
)

iface.launch()