Update app.py
Browse files
app.py
CHANGED
|
@@ -1,19 +1,30 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
from scipy.io import wavfile
|
| 4 |
from espnet2.bin.tts_inference import Text2Speech
|
|
|
|
| 5 |
import soundfile as sf
|
| 6 |
|
| 7 |
-
notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
| 8 |
|
| 9 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
|
| 11 |
-
wav = tts("sil
|
| 12 |
audio_data = wav.numpy()
|
| 13 |
sf.write('output.wav', audio_data, samplerate=22050)
|
| 14 |
|
| 15 |
def generate_audio(note, octave, duration):
|
| 16 |
-
generate_tts()
|
| 17 |
wav_file_path = "output.wav"
|
| 18 |
|
| 19 |
sr, audio_data = wavfile.read(wav_file_path)
|
|
@@ -23,9 +34,8 @@ def generate_audio(note, octave, duration):
|
|
| 23 |
demo = gr.Interface(
|
| 24 |
generate_audio,
|
| 25 |
[
|
| 26 |
-
gr.Dropdown(notes, type="index"),
|
| 27 |
gr.Slider(4, 6, step=1),
|
| 28 |
-
gr.Textbox(value=
|
| 29 |
],
|
| 30 |
"audio",
|
| 31 |
)
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import gradio as gr
|
| 3 |
+
|
| 4 |
from scipy.io import wavfile
|
| 5 |
from espnet2.bin.tts_inference import Text2Speech
|
| 6 |
+
from arabic_pronounce import phonetise
|
| 7 |
import soundfile as sf
|
| 8 |
|
|
|
|
| 9 |
|
| 10 |
+
def text_to_phoneme(tun_text):
|
| 11 |
+
space_split = tun_text.split(" ")
|
| 12 |
+
res = ""
|
| 13 |
+
for i in range(len(space_split)):
|
| 14 |
+
res +=" "+phonetise(space_split[i])[0]
|
| 15 |
+
res = res.strip()
|
| 16 |
+
res = "sil "+res+" sil"
|
| 17 |
+
return res
|
| 18 |
+
|
| 19 |
+
def generate_tts(input_text):
|
| 20 |
+
phonemized_text = text_to_phoneme(input_text)
|
| 21 |
tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
|
| 22 |
+
wav = tts(f"sil {phonemized_text} sil")["wav"]
|
| 23 |
audio_data = wav.numpy()
|
| 24 |
sf.write('output.wav', audio_data, samplerate=22050)
|
| 25 |
|
| 26 |
def generate_audio(note, octave, duration):
|
| 27 |
+
generate_tts(duration)
|
| 28 |
wav_file_path = "output.wav"
|
| 29 |
|
| 30 |
sr, audio_data = wavfile.read(wav_file_path)
|
|
|
|
| 34 |
demo = gr.Interface(
|
| 35 |
generate_audio,
|
| 36 |
[
|
|
|
|
| 37 |
gr.Slider(4, 6, step=1),
|
| 38 |
+
gr.Textbox(value="empty", label="Text of Arabic Text"),
|
| 39 |
],
|
| 40 |
"audio",
|
| 41 |
)
|