Spaces:

cmeyer5678
/

test

No application file

App Files Files Community

cmeyer5678 commited on Apr 26, 2024

Commit

4ed8985

verified ·

1 Parent(s): 15d4845

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# -*- coding: utf-8 -*-
+"""AI_Club_Multilingual_Speech_Synthesis_Friday.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1EZPulIF2l2emrtMxVSm4q9D__aWLmpp-
+# AI Club Multilingual Speech Synthesis
+Spring 2024 AI Club at San Diego State University
+## Downloading library dependencies
+"""
+pip install gradio
+pip install git+https://github.com/openai/whisper.git
+pip install translate
+pip install TTS
+from google.colab import drive
+drive.mount('/content/drive')
+"""## Importing libraries and dependencies"""
+import gradio as gr
+import numpy as np
+#import ffmpeg
+import whisper
+from translate import Translator
+from TTS.api import TTS
+# Loading the base model
+model = whisper.load_model("base")
+def speech_to_text(audio):
+    result = model.transcribe(audio)
+    return result["text"] # Only first tuple
+# Defining the Translate Function
+def translate(text, language):
+    # Replace this with actual translation logic using a translation library or API
+    translator = Translator(to_lang=language)
+    translated_text = translator.translate(text)
+    return translated_text
+# Initialize TTS model outside the function to avoid reinitialization on each call
+tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
+# Speech to Speech Function
+def s2s(audio, language):
+    # Do some text processing here (transcription and translation)
+    result_text = speech_to_text(audio)
+    translated_text = translate(result_text, language)
+    # Generate speech using the input audio as the speaker's voice
+    tts_model.tts_to_file(text=translated_text,
+                          file_path="output.wav",
+                          speaker_wav=audio,
+                          language=language)
+    with open("output.wav", "rb") as audio_file:
+        audio_data = audio_file.read()
+    return [result_text, translated_text, audio_data]
+# List of supported language codes
+language_names = ["Arabic", "Portuguese", "Chinese", "Czech", "Dutch",
+                  "English", "French", "German", "Italian", "Polish",
+                  "Russian", "Spanish", "Turkish", "Korean",
+                  "Hungarian", "Hindi"]
+language_options = ["ar",
+                    "pt",
+                    "zh-cn",
+                    "cs",
+                    "nl",
+                    "en",
+                    "fr",
+                    "de",
+                    "it",
+                    "pl",
+                    "ru",
+                    "es",
+                    "tr",
+                    "ko",
+                    "hu",
+                    "hi"]
+language_dropdown = gr.Dropdown(choices = zip(language_names, language_options),
+                                value= "es",
+                                label="Target Language",
+)
+translate_button = gr.Button(value="Synthesize and Translate my Voice!")
+transcribed_text = gr.Textbox(label="Transcribed Text")
+output_text = gr.Textbox(label="Translated Text")
+output_speech = gr.Audio(label="Translated Speech", type="filepath")
+# Gradio interface with the transcribe function as the main function
+demo = gr.Interface(
+    # title='Speech Translation',
+    fn=s2s,
+    inputs=[gr.Audio(sources=["upload", "microphone"],
+                     type="filepath", format = "wav",
+                     show_download_button=True,
+                     waveform_options=gr.WaveformOptions(
+                        waveform_color="#01C6FF",
+                        waveform_progress_color="#0066B4",
+                        skip_length=2,
+                        show_controls=False,
+                        )
+                     ),
+            language_dropdown],
+    outputs=[transcribed_text, output_text, output_speech],
+    title="Speech-to-Speech Translation (Demo)"
+)
+#demo.launch(debug=True, share = True)
+demo.launch()