Spaces:

Flux9665
/

MassivelyMultilingualTTS

Running on T4

App Files Files

Flux9665 commited on Jun 11, 2024

Commit

6a66802

1 Parent(s): ee42912

try to figure out how ZeroGPU works

Browse files

Files changed (1) hide show

app.py +24 -27

app.py CHANGED Viewed

@@ -119,7 +119,13 @@ class ControllableInterface(torch.nn.Module):
         return sr, wav, fig
 def read(prompt,
@@ -149,26 +155,17 @@ def read(prompt,
     return (sr, float2pcm(wav)), fig
-if __name__ == '__main__':
-    title = "Controllable Text-to-Speech for over 7000 Languages"
-    article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
-    available_artificial_voices = 1000
-    path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
-    iso_to_name = load_json_from_path(path_to_iso_list)
-    text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
-    controllable_ui = ControllableInterface(available_artificial_voices=available_artificial_voices)
-    iface = gr.Interface(fn=read,
-                         inputs=[gr.Textbox(lines=2,
-                                            placeholder="write what you want the synthesis to read here...",
-                                            value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
-                                            label="Text input"),
-                                 gr.Dropdown(text_selection,
-                                             type="value",
-                                             value='English Text (eng)',
-                                             label="Select the Language of the Text (type on your keyboard to find it quickly)"),
-                                 gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
-                                           value=279,
                                            label="Random Seed for the artificial Voice"),
                                  gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
                                  gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
@@ -176,10 +173,10 @@ if __name__ == '__main__':
                                  gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
                                  gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
                                  ],
-                         outputs=[gr.Audio(type="numpy", label="Speech"),
                                   gr.Image(label="Visualization")],
-                         title=title,
-                         theme="default",
-                         allow_flagging="never",
-                         article=article)
-    iface.launch()

         return sr, wav, fig
+title = "Controllable Text-to-Speech for over 7000 Languages"
+article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
+available_artificial_voices = 1000
+path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
+iso_to_name = load_json_from_path(path_to_iso_list)
+text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
+controllable_ui = ControllableInterface(available_artificial_voices=available_artificial_voices)
 def read(prompt,
     return (sr, float2pcm(wav)), fig
+iface = gr.Interface(fn=read,
+                     inputs=[gr.Textbox(lines=2,
+                                        placeholder="write what you want the synthesis to read here...",
+                                        value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
+                                        label="Text input"),
+                             gr.Dropdown(text_selection,
+                                         type="value",
+                                         value='English Text (eng)',
+                                         label="Select the Language of the Text (type on your keyboard to find it quickly)"),
+                             gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
+                                       value=279,
                                            label="Random Seed for the artificial Voice"),
                                  gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
                                  gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
                                  gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
                                  gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
                                  ],
+                     outputs=[gr.Audio(type="numpy", label="Speech"),
                                   gr.Image(label="Visualization")],
+                     title=title,
+                     theme="default",
+                     allow_flagging="never",
+                     article=article)
+iface.launch()