cmeyer5678 commited on
Commit
2938ff6
·
verified ·
1 Parent(s): 687a427

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -115
app.py DELETED
@@ -1,115 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """AI_Club_Multilingual_Speech_Synthesis_Friday.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1EZPulIF2l2emrtMxVSm4q9D__aWLmpp-
8
-
9
- # AI Club Multilingual Speech Synthesis
10
- Spring 2024 AI Club at San Diego State University
11
-
12
- ## Downloading library dependencies
13
- """
14
- pip install --upgrade pip
15
- pip install spacy
16
- pip install gradio
17
- pip install git+https://github.com/openai/whisper.git
18
- pip install translate
19
- pip install TTS
20
-
21
- """## Importing libraries and dependencies"""
22
-
23
- import gradio as gr
24
- import numpy as np
25
- #import ffmpeg
26
- import whisper
27
- from translate import Translator
28
- from TTS.api import TTS
29
-
30
- # Loading the base model
31
- model = whisper.load_model("base")
32
-
33
- def speech_to_text(audio):
34
- result = model.transcribe(audio)
35
- return result["text"] # Only first tuple
36
-
37
- # Defining the Translate Function
38
- def translate(text, language):
39
- # Replace this with actual translation logic using a translation library or API
40
- translator = Translator(to_lang=language)
41
- translated_text = translator.translate(text)
42
- return translated_text
43
-
44
- # Initialize TTS model outside the function to avoid reinitialization on each call
45
- tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
46
-
47
- # Speech to Speech Function
48
- def s2s(audio, language):
49
- # Do some text processing here (transcription and translation)
50
- result_text = speech_to_text(audio)
51
- translated_text = translate(result_text, language)
52
-
53
- # Generate speech using the input audio as the speaker's voice
54
- tts_model.tts_to_file(text=translated_text,
55
- file_path="output.wav",
56
- speaker_wav=audio,
57
- language=language)
58
- with open("output.wav", "rb") as audio_file:
59
- audio_data = audio_file.read()
60
-
61
- return [result_text, translated_text, audio_data]
62
-
63
- # List of supported language codes
64
- language_names = ["Arabic", "Portuguese", "Chinese", "Czech", "Dutch",
65
- "English", "French", "German", "Italian", "Polish",
66
- "Russian", "Spanish", "Turkish", "Korean",
67
- "Hungarian", "Hindi"]
68
- language_options = ["ar",
69
- "pt",
70
- "zh-cn",
71
- "cs",
72
- "nl",
73
- "en",
74
- "fr",
75
- "de",
76
- "it",
77
- "pl",
78
- "ru",
79
- "es",
80
- "tr",
81
- "ko",
82
- "hu",
83
- "hi"]
84
-
85
- language_dropdown = gr.Dropdown(choices = zip(language_names, language_options),
86
- value= "es",
87
- label="Target Language",
88
- )
89
-
90
- translate_button = gr.Button(value="Synthesize and Translate my Voice!")
91
- transcribed_text = gr.Textbox(label="Transcribed Text")
92
- output_text = gr.Textbox(label="Translated Text")
93
- output_speech = gr.Audio(label="Translated Speech", type="filepath")
94
-
95
- # Gradio interface with the transcribe function as the main function
96
- demo = gr.Interface(
97
- # title='Speech Translation',
98
- fn=s2s,
99
- inputs=[gr.Audio(sources=["upload", "microphone"],
100
- type="filepath", format = "wav",
101
- show_download_button=True,
102
- waveform_options=gr.WaveformOptions(
103
- waveform_color="#01C6FF",
104
- waveform_progress_color="#0066B4",
105
- skip_length=2,
106
- show_controls=False,
107
- )
108
- ),
109
- language_dropdown],
110
- outputs=[transcribed_text, output_text, output_speech],
111
-
112
- title="Speech-to-Speech Translation (Demo)"
113
- )
114
- #demo.launch(debug=True, share = True)
115
- demo.launch()