imenLa commited on
Commit
23e6066
·
verified ·
1 Parent(s): f572cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -1,19 +1,30 @@
1
  import numpy as np
2
  import gradio as gr
 
3
  from scipy.io import wavfile
4
  from espnet2.bin.tts_inference import Text2Speech
 
5
  import soundfile as sf
6
 
7
- notes = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
8
 
9
- def generate_tts():
 
 
 
 
 
 
 
 
 
 
10
  tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
11
- wav = tts("sil t r aa ii0 m a rr a < u0 x r aa uu0 sil h uu0 sil n uu0 uu0 z sil m aa ii0 b ii0 n aa k s t t aa ii0 m sil")["wav"]
12
  audio_data = wav.numpy()
13
  sf.write('output.wav', audio_data, samplerate=22050)
14
 
15
  def generate_audio(note, octave, duration):
16
- generate_tts()
17
  wav_file_path = "output.wav"
18
 
19
  sr, audio_data = wavfile.read(wav_file_path)
@@ -23,9 +34,8 @@ def generate_audio(note, octave, duration):
23
  demo = gr.Interface(
24
  generate_audio,
25
  [
26
- gr.Dropdown(notes, type="index"),
27
  gr.Slider(4, 6, step=1),
28
- gr.Textbox(value=1, label="Duration in seconds"),
29
  ],
30
  "audio",
31
  )
 
1
  import numpy as np
2
  import gradio as gr
3
+
4
  from scipy.io import wavfile
5
  from espnet2.bin.tts_inference import Text2Speech
6
+ from arabic_pronounce import phonetise
7
  import soundfile as sf
8
 
 
9
 
10
+ def text_to_phoneme(tun_text):
11
+ space_split = tun_text.split(" ")
12
+ res = ""
13
+ for i in range(len(space_split)):
14
+ res +=" "+phonetise(space_split[i])[0]
15
+ res = res.strip()
16
+ res = "sil "+res+" sil"
17
+ return res
18
+
19
+ def generate_tts(input_text):
20
+ phonemized_text = text_to_phoneme(input_text)
21
  tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl")
22
+ wav = tts(f"sil {phonemized_text} sil")["wav"]
23
  audio_data = wav.numpy()
24
  sf.write('output.wav', audio_data, samplerate=22050)
25
 
26
  def generate_audio(note, octave, duration):
27
+ generate_tts(duration)
28
  wav_file_path = "output.wav"
29
 
30
  sr, audio_data = wavfile.read(wav_file_path)
 
34
  demo = gr.Interface(
35
  generate_audio,
36
  [
 
37
  gr.Slider(4, 6, step=1),
38
+ gr.Textbox(value="empty", label="Text of Arabic Text"),
39
  ],
40
  "audio",
41
  )