cmeyer5678 commited on
Commit
4ed8985
·
verified ·
1 Parent(s): 15d4845

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """AI_Club_Multilingual_Speech_Synthesis_Friday.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1EZPulIF2l2emrtMxVSm4q9D__aWLmpp-
8
+
9
+ # AI Club Multilingual Speech Synthesis
10
+ Spring 2024 AI Club at San Diego State University
11
+
12
+ ## Downloading library dependencies
13
+ """
14
+
15
+ pip install gradio
16
+ pip install git+https://github.com/openai/whisper.git
17
+ pip install translate
18
+ pip install TTS
19
+
20
+ from google.colab import drive
21
+ drive.mount('/content/drive')
22
+
23
+
24
+
25
+ """## Importing libraries and dependencies"""
26
+
27
+ import gradio as gr
28
+ import numpy as np
29
+ #import ffmpeg
30
+ import whisper
31
+ from translate import Translator
32
+ from TTS.api import TTS
33
+
34
+ # Loading the base model
35
+ model = whisper.load_model("base")
36
+
37
+ def speech_to_text(audio):
38
+ result = model.transcribe(audio)
39
+ return result["text"] # Only first tuple
40
+
41
+ # Defining the Translate Function
42
+ def translate(text, language):
43
+ # Replace this with actual translation logic using a translation library or API
44
+ translator = Translator(to_lang=language)
45
+ translated_text = translator.translate(text)
46
+ return translated_text
47
+
48
+ # Initialize TTS model outside the function to avoid reinitialization on each call
49
+ tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
50
+
51
+ # Speech to Speech Function
52
+ def s2s(audio, language):
53
+ # Do some text processing here (transcription and translation)
54
+ result_text = speech_to_text(audio)
55
+ translated_text = translate(result_text, language)
56
+
57
+ # Generate speech using the input audio as the speaker's voice
58
+ tts_model.tts_to_file(text=translated_text,
59
+ file_path="output.wav",
60
+ speaker_wav=audio,
61
+ language=language)
62
+ with open("output.wav", "rb") as audio_file:
63
+ audio_data = audio_file.read()
64
+
65
+ return [result_text, translated_text, audio_data]
66
+
67
+ # List of supported language codes
68
+ language_names = ["Arabic", "Portuguese", "Chinese", "Czech", "Dutch",
69
+ "English", "French", "German", "Italian", "Polish",
70
+ "Russian", "Spanish", "Turkish", "Korean",
71
+ "Hungarian", "Hindi"]
72
+ language_options = ["ar",
73
+ "pt",
74
+ "zh-cn",
75
+ "cs",
76
+ "nl",
77
+ "en",
78
+ "fr",
79
+ "de",
80
+ "it",
81
+ "pl",
82
+ "ru",
83
+ "es",
84
+ "tr",
85
+ "ko",
86
+ "hu",
87
+ "hi"]
88
+
89
+ language_dropdown = gr.Dropdown(choices = zip(language_names, language_options),
90
+ value= "es",
91
+ label="Target Language",
92
+ )
93
+
94
+ translate_button = gr.Button(value="Synthesize and Translate my Voice!")
95
+ transcribed_text = gr.Textbox(label="Transcribed Text")
96
+ output_text = gr.Textbox(label="Translated Text")
97
+ output_speech = gr.Audio(label="Translated Speech", type="filepath")
98
+
99
+ # Gradio interface with the transcribe function as the main function
100
+ demo = gr.Interface(
101
+ # title='Speech Translation',
102
+ fn=s2s,
103
+ inputs=[gr.Audio(sources=["upload", "microphone"],
104
+ type="filepath", format = "wav",
105
+ show_download_button=True,
106
+ waveform_options=gr.WaveformOptions(
107
+ waveform_color="#01C6FF",
108
+ waveform_progress_color="#0066B4",
109
+ skip_length=2,
110
+ show_controls=False,
111
+ )
112
+ ),
113
+ language_dropdown],
114
+ outputs=[transcribed_text, output_text, output_speech],
115
+
116
+ title="Speech-to-Speech Translation (Demo)"
117
+ )
118
+ #demo.launch(debug=True, share = True)
119
+ demo.launch()