cmeyer5678 commited on
Commit
e4da912
Β·
verified Β·
1 Parent(s): 6551dd3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import os
3
+ from TTS.api import TTS
4
+ import torch.serialization
5
+ import gradio as gr
6
+ from translate import Translator
7
+
8
+ model = whisper.load_model("base")
9
+
10
+ def speech_to_text(audio_file):
11
+ result = model.transcribe(audio_file)
12
+ print(result["text"])
13
+ return result["text"]
14
+
15
+ def translate(text, language):
16
+ translator = Translator(to_lang=language)
17
+ translated_text = translator.translate(text)
18
+ return translated_text
19
+
20
+
21
+ original_load = torch.load
22
+
23
+
24
+ def patched_load(*args, **kwargs):
25
+ if 'weights_only' in kwargs:
26
+ kwargs['weights_only'] = False
27
+ return original_load(*args, **kwargs)
28
+
29
+
30
+ torch.load = patched_load
31
+
32
+ os.environ["COQUI_TOS_AGREED"] = "1"
33
+
34
+ tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
35
+
36
+ # Speech to Speech Function
37
+ def s2s(audio, language):
38
+ print(audio)
39
+ # Load the audio file from the file path
40
+ result_text = speech_to_text(audio)
41
+ translated_text = translate(result_text, language)
42
+
43
+ # Generate speech using the input audio as the speaker's voice
44
+ tts_model.tts_to_file(text=translated_text,
45
+ file_path="output.wav",
46
+ # speaker_wav=tmp_path,
47
+ speaker_wav=audio,
48
+ language=language)
49
+ with open("output.wav", "rb") as audio_file:
50
+ audio_data = audio_file.read()
51
+
52
+ # # Remove the temporary file
53
+ # os.remove(tmp_path)
54
+
55
+ return [result_text, translated_text, "output.wav"]
56
+
57
+ # List of supported language codes
58
+ language_names = ["Arabic", "Portuguese", "Chinese", "Czech", "Dutch",
59
+ "English", "French", "German", "Italian", "Polish",
60
+ "Russian", "Spanish", "Turkish", "Korean",
61
+ "Hungarian", "Hindi"]
62
+ language_options = ["ar", "pt", "zh-cn", "cs", "nl", "en", "fr", "de",
63
+ "it", "pl", "ru", "es", "tr", "ko", "hu", "hi"]
64
+
65
+
66
+ language_dropdown = gr.Dropdown(choices=zip(language_names, language_options),
67
+ value="es",
68
+ label="Target Language",
69
+ )
70
+
71
+ translate_button = gr.Button(value="Synthesize and Translate my Voice!")
72
+ transcribed_text = gr.Textbox(label="Transcribed Text")
73
+ output_text = gr.Textbox(label="Translated Text")
74
+ output_speech = gr.Audio(label="Translated Speech", type="filepath")
75
+
76
+ # Gradio interface with the transcribe function as the main function
77
+ demo = gr.Interface(
78
+ fn=s2s,
79
+ inputs=[gr.Audio(sources=["upload", "microphone"],
80
+ type="filepath",
81
+ format='wav',
82
+ show_download_button=True,
83
+ waveform_options=gr.WaveformOptions(
84
+ waveform_color="#01C6FF",
85
+ waveform_progress_color="FF69B4",
86
+ skip_length=2,
87
+ show_controls=False,
88
+ )
89
+ ),
90
+ language_dropdown],
91
+ outputs=[transcribed_text, output_text, output_speech],
92
+ theme=gr.themes.Soft(),
93
+ title="Speech-to-Speech Translation (Demo)"
94
+ )
95
+
96
+ demo.launch(debug=True, share=True)
97
+