File size: 4,756 Bytes
440158f 61da54a 440158f 61da54a 440158f 61da54a 440158f 9da9992 440158f cc38f71 9bbc2de d84be0f cc38f71 d84be0f 9bbc2de 440158f d84be0f cc38f71 d84be0f 440158f cc38f71 440158f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | from deep_translator import GoogleTranslator
from transformers import pipeline
import gradio as gr
import time
from pytube import YouTube
pipe = pipeline(model="tlord/whisper") # change to "your-username/the-name-you-picked"
# def transcribe(audio, state = ""):
# time.sleep(2)
# text = pipe(audio)["text"]
# state += text + " "
# return state, state
# iface = gr.Interface(
# title="Whisper Small Swedish",
# description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
# fn=transcribe,
# inputs=[gr.Audio(source="microphone", type="filepath", streaming=True), "state"],
# outputs=["text", "state"],
# live=True
# )
# chatbot = gr.Chatbot().style(color_map=("green", "gray"))
# iface = gr.Interface(
# title="Whisper Sentiment Analysis in Swedish",
# description="Say something and the Oracle will respond depending on your mood.",
# fn=transcribe,
# inputs=[gr.Audio(source="microphone", type="filepath"), "state"],
# outputs=[chatbot, "state"],
# allow_flagging="never",
# )
LANGUAGES = {
'afrikaans' : 'af',
'albanian' : 'sq',
'amharic' : 'am',
'arabic' : 'ar',
'armenian' : 'hy',
'azerbaijani' : 'az',
'basque' : 'eu',
'belarusian' : 'be',
'bengali' : 'bn',
'bosnian' : 'bs',
'bulgarian' : 'bg',
'catalan' : 'ca',
'cebuano' : 'ceb',
'chichewa' : 'ny',
'chinese (simplified)' : 'zh-c',
'chinese (traditional)' : 'zh-t',
'corsican' : 'co',
'croatian' : 'hr',
'czech' : 'cs',
'danish' : 'da',
'dutch' : 'nl',
'english' : 'en',
'esperanto' : 'eo',
'estonian' : 'et',
'filipino' : 'tl',
'finnish' : 'fi',
'french' : 'fr',
'frisian' : 'fy',
'galician' : 'gl',
'georgian' : 'ka',
'german' : 'de',
'greek' : 'el',
'gujarati' : 'gu',
'haitian creole' : 'ht',
'hausa' : 'ha',
'hawaiian' : 'haw',
'hebrew' : 'iw',
'hebrew' : 'he',
'hindi' : 'hi',
'hmong' : 'hmn',
'hungarian' : 'hu',
'icelandic' : 'is',
'igbo' : 'ig',
'indonesian' : 'id',
'irish' : 'ga',
'italian' : 'it',
'japanese' : 'ja',
'javanese' : 'jw',
'kannada' : 'kn',
'kazakh' : 'kk',
'khmer' : 'km',
'korean' : 'ko',
'kurdish (kurmanji)' : 'ku',
'kyrgyz' : 'ky',
'lao' : 'lo',
'latin' : 'la',
'latvian' : 'lv',
'lithuanian' : 'lt',
'luxembourgish' : 'lb',
'macedonian' : 'mk',
'malagasy' : 'mg',
'malay' : 'ms',
'malayalam' : 'ml',
'maltese' : 'mt',
'maori' : 'mi',
'marathi' : 'mr',
'mongolian' : 'mn',
'myanmar (burmese)' : 'my',
'nepali' : 'ne',
'norwegian' : 'no',
'odia' : 'or',
'pashto' : 'ps',
'persian' : 'fa',
'polish' : 'pl',
'portuguese' : 'pt',
'punjabi' : 'pa',
'romanian' : 'ro',
'russian' : 'ru',
'samoan' : 'sm',
'scots gaelic' : 'gd',
'serbian' : 'sr',
'sesotho' : 'st',
'shona' : 'sn',
'sindhi' : 'sd',
'sinhala' : 'si',
'slovak' : 'sk',
'slovenian' : 'sl',
'somali' : 'so',
'spanish' : 'es',
'sundanese' : 'su',
'swahili' : 'sw',
'tajik' : 'tg',
'tamil' : 'ta',
'telugu' : 'te',
'thai' : 'th',
'turkish' : 'tr',
'ukrainian' : 'uk',
'urdu' : 'ur',
'uyghur' : 'ug',
'uzbek' : 'uz',
'vietnamese' : 'vi',
'welsh' : 'cy',
'xhosa' : 'xh',
'yiddish' : 'yi',
'yoruba' : 'yo',
'zulu' : 'zu',
}
def get_soundfile(link):
yt = YouTube(link)
audio = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
return audio
def translate(message, lang):
res = GoogleTranslator(source='sv', target=lang).translate(message)
if res != None and res != "":
return res
else:
return "Error, sorry!"
def transcribe(audio, lang, history, link):
if link != "":
audio = get_soundfile(link)
if lang is None or lang == "":
lang = 'english'
history = history or []
lang_code = LANGUAGES[lang]
text = pipe(audio)["text"]
history.append((text, translate(text, lang_code)))
return history, history
with gr.Blocks() as demo:
history = gr.State([])
with gr.Row():
with gr.Column():
language = gr.Dropdown(list(LANGUAGES.keys()), value="english")
audio = gr.Audio(source="microphone", type="filepath")
link = gr.Textbox(label = "Put YouTube link here", value="")
submit = gr.Button(value="Translate")
with gr.Column():
chatbot = gr.Chatbot().style(color_map=("green", "gray"))
submit.click(transcribe, inputs=[audio, language, history, link], outputs=[chatbot, history])
demo.launch() |