presencesw commited on
Commit
e2dc557
·
1 Parent(s): 3301011

Feat: add app.py file

Browse files
Files changed (2) hide show
  1. Dockerfile +0 -2
  2. app.py +59 -0
Dockerfile CHANGED
@@ -2,8 +2,6 @@ FROM python:3.12
2
 
3
  WORKDIR /app
4
  COPY ./requirements.txt /app/requirements.txt
5
- # copy all . to /app
6
- COPY . /app/
7
 
8
  # RUN apt-get update && apt-get install -y espeak
9
  RUN apt-get update && apt-get install -y espeak-ng espeak-ng-data
 
2
 
3
  WORKDIR /app
4
  COPY ./requirements.txt /app/requirements.txt
 
 
5
 
6
  # RUN apt-get update && apt-get install -y espeak
7
  RUN apt-get update && apt-get install -y espeak-ng espeak-ng-data
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from split_audio.main import AudioSplitter
3
+ import numpy as np
4
+ import time
5
+ import os
6
+ import soundfile as sf
7
+ import librosa
8
+
9
+ splitter = AudioSplitter(language="vi")
10
+
11
+ def split_audio(str_raw, str_trunc, audio_input):
12
+ audio_input = audio_input[1]
13
+ y_cut = splitter.split_audio(str_raw, str_trunc, audio_input)
14
+ intervals = librosa.effects.split(y_cut, top_db=30)
15
+ y_cut = np.concatenate([y_cut[start:end] for start, end in intervals])
16
+ return (24000, y_cut)
17
+
18
+ with gr.Blocks() as demo:
19
+ # with gr.Row():
20
+ # text_input = gr.Textbox(value="Đây là một ví dụ về tổng hợp giọng nói.")
21
+ # audio_output = gr.Audio()
22
+ # with gr.Row():
23
+ # run_button = gr.Button(value="generate voice")
24
+ # rtf_log = gr.Number(label="Real Time Factor")
25
+ with gr.Row():
26
+ with gr.Column():
27
+ text_raw = gr.Textbox(value="", label="Text raw", interactive=False, lines=3)
28
+ text_cut = gr.Textbox(label="temp", lines=3)
29
+ run_button = gr.Button(value="Run")
30
+ with gr.Column():
31
+ audio_input = gr.Audio(label="Audio raw", interactive=False, )
32
+ audio_output = gr.Audio(label="Temp", interactive=False)
33
+
34
+ # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output])
35
+ # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output, rtf_log, phonemizer],)
36
+ run_button.click(fn=split_audio, inputs=[text_raw, text_cut, audio_input], outputs=[audio_output])
37
+ # get all file wavs in "audio_example" folder
38
+
39
+ audio_files = [f for f in os.listdir("audio_example") if f.endswith(".wav")]
40
+ audio_files = audio_files[:30]
41
+ # text_files = [f.replace(".wav", ".txt") for f in audio_files]
42
+ # audio = [sf.read(os.path.join("audio_example", f)) for f in audio_files]
43
+ # text = [open(os.path.join("audio_example", f), "r", encoding="utf-8").read().strip() for f in text_files]
44
+
45
+ examples_data = []
46
+ for wav_file in audio_files:
47
+ # waveform, sr = sf.read(os.path.join("audio_example", wav_file))
48
+ waveform, sr = librosa.load(os.path.join("audio_example", wav_file), sr=24000)
49
+ intervals = librosa.effects.split(waveform, top_db=30)
50
+
51
+ waveform = np.concatenate([waveform[start:end] for start, end in intervals])
52
+ with open(os.path.join("audio_example", wav_file.replace(".wav", ".txt")), "r", encoding="utf-8") as f:
53
+ text = f.read().strip()
54
+ examples_data.append([text, (sr, waveform)])
55
+
56
+ gr.Examples(examples=examples_data, inputs=[text_raw, audio_input])
57
+
58
+ if __name__ == "__main__":
59
+ demo.launch(server_port=7860, server_name="0.0.0.0")