roger commited on
Commit
b52931d
·
1 Parent(s): fddcb70

feat: add audio converted demo

Browse files
Files changed (2) hide show
  1. .gitattributes +1 -0
  2. app.py +116 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # Define the audio file paths (replace with your actual paths)
4
+ audio_files = {
5
+ "EN": {
6
+ "source1": "asset/audio/speech_clone_samples/source/-8014568635405176842.wav",
7
+ "source2": "asset/audio/speech_clone_samples/source/2188769758301752050.wav",
8
+ "prompt1": [
9
+ "asset/audio/speech_clone_samples/prompt/prompt1/4813840990459345930.wav",
10
+ "asset/audio/speech_clone_samples/prompt/prompt1/-4261051484297537007.wav"
11
+ ],
12
+ "prompt2": [
13
+ "asset/audio/speech_clone_samples/prompt/prompt2/-5427774732334682307.wav",
14
+ "asset/audio/speech_clone_samples/prompt/prompt2/-8434461861028245286.wav"
15
+ ],
16
+ "gen1": [
17
+ "asset/audio/speech_clone_samples/generated/gen1/5518114099457736437.wav",
18
+ "asset/audio/speech_clone_samples/generated/gen1/7702800575106132714.wav"
19
+ ],
20
+ "gen2": [
21
+ "asset/audio/speech_clone_samples/generated/gen2/-3552571881595006474.wav",
22
+ "asset/audio/speech_clone_samples/generated/gen2/1663763965594639195.wav"
23
+ ]
24
+ },
25
+ "ZH": {
26
+ "source1": "asset/audio/speech_clone_samples/source/6180100163014579264.wav",
27
+ "source2": "asset/audio/speech_clone_samples/source/3454520432972073544.wav",
28
+ "prompt3": [
29
+ "asset/audio/speech_clone_samples/prompt/prompt3/-3133904573328901327.wav",
30
+ "asset/audio/speech_clone_samples/prompt/prompt3/-6807997165982172717.wav"
31
+ ],
32
+ "prompt4": [
33
+ "asset/audio/speech_clone_samples/prompt/prompt4/3957690686751537502.wav",
34
+ "asset/audio/speech_clone_samples/prompt/prompt4/-6025252638827969073.wav"
35
+ ],
36
+ "gen3": [
37
+ "asset/audio/speech_clone_samples/generated/gen3/4648220778277450149.wav",
38
+ "asset/audio/speech_clone_samples/generated/gen3/3417274376341807017.wav"
39
+ ],
40
+ "gen4": [
41
+ "asset/audio/speech_clone_samples/generated/gen4/8580165735770550130.wav",
42
+ "asset/audio/speech_clone_samples/generated/gen4/-2291633937079669023.wav"
43
+ ]
44
+ }
45
+ }
46
+
47
+
48
+ def create_audio_demo():
49
+ with gr.Blocks(title="Voice Conversion") as demo:
50
+ gr.Markdown("# Voice Conversion")
51
+
52
+ with gr.Tabs():
53
+ with gr.TabItem("English (EN)"):
54
+ with gr.Row():
55
+ with gr.Column():
56
+ gr.Markdown("### Source Audio 1")
57
+ gr.Audio(audio_files["EN"]["source1"], label="Source 1")
58
+ with gr.Column():
59
+ gr.Markdown("### Prompts")
60
+ gr.Audio(audio_files["EN"]["prompt1"][0], label="Prompt 1")
61
+ gr.Audio(audio_files["EN"]["prompt1"][1], label="Prompt 2")
62
+
63
+ with gr.Column():
64
+ gr.Markdown("### Converted Audio")
65
+ gr.Audio(audio_files["EN"]["gen1"][0], label="Converted 1")
66
+ gr.Audio(audio_files["EN"]["gen1"][1], label="Converted 2")
67
+
68
+ with gr.Row():
69
+ with gr.Column():
70
+ gr.Markdown("### Source Audio 2")
71
+ gr.Audio(audio_files["EN"]["source2"], label="Source 2")
72
+ with gr.Column():
73
+ gr.Markdown("### Prompts")
74
+ gr.Audio(audio_files["EN"]["prompt2"][0], label="Prompt 1")
75
+ gr.Audio(audio_files["EN"]["prompt2"][1], label="Prompt 2")
76
+
77
+ with gr.Column():
78
+ gr.Markdown("### Converted Audio")
79
+ gr.Audio(audio_files["EN"]["gen2"][0], label="Converted 1")
80
+ gr.Audio(audio_files["EN"]["gen2"][1], label="Converted 2")
81
+
82
+ with gr.TabItem("Chinese (ZH)"):
83
+ with gr.Row():
84
+ with gr.Column():
85
+ gr.Markdown("### Source Audio 1")
86
+ gr.Audio(audio_files["ZH"]["source1"], label="Source 1")
87
+ with gr.Column():
88
+ gr.Markdown("### Prompts")
89
+ gr.Audio(audio_files["ZH"]["prompt3"][0], label="Prompt 1")
90
+ gr.Audio(audio_files["ZH"]["prompt3"][1], label="Prompt 2")
91
+
92
+ with gr.Column():
93
+ gr.Markdown("### Converted Audio")
94
+ gr.Audio(audio_files["ZH"]["gen3"][0], label="Converted 1")
95
+ gr.Audio(audio_files["ZH"]["gen3"][1], label="Converted 2")
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ gr.Markdown("### Source Audio 2")
100
+ gr.Audio(audio_files["ZH"]["source2"], label="Source 2")
101
+ with gr.Column():
102
+ gr.Markdown("### Prompts")
103
+ gr.Audio(audio_files["ZH"]["prompt4"][0], label="Prompt 1")
104
+ gr.Audio(audio_files["ZH"]["prompt4"][1], label="Prompt 2")
105
+
106
+ with gr.Column():
107
+ gr.Markdown("### Converted Audio")
108
+ gr.Audio(audio_files["ZH"]["gen4"][0], label="Converted 1")
109
+ gr.Audio(audio_files["ZH"]["gen4"][1], label="Converted 2")
110
+
111
+ return demo
112
+
113
+
114
+ if __name__ == "__main__":
115
+ demo = create_audio_demo()
116
+ demo.launch()