whisper / app.py
jacobda's picture
Update app.py
d84be0f
from deep_translator import GoogleTranslator
from transformers import pipeline
import gradio as gr
import time
from pytube import YouTube
pipe = pipeline(model="tlord/whisper") # change to "your-username/the-name-you-picked"
# def transcribe(audio, state = ""):
# time.sleep(2)
# text = pipe(audio)["text"]
# state += text + " "
# return state, state
# iface = gr.Interface(
# title="Whisper Small Swedish",
# description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
# fn=transcribe,
# inputs=[gr.Audio(source="microphone", type="filepath", streaming=True), "state"],
# outputs=["text", "state"],
# live=True
# )
# chatbot = gr.Chatbot().style(color_map=("green", "gray"))
# iface = gr.Interface(
# title="Whisper Sentiment Analysis in Swedish",
# description="Say something and the Oracle will respond depending on your mood.",
# fn=transcribe,
# inputs=[gr.Audio(source="microphone", type="filepath"), "state"],
# outputs=[chatbot, "state"],
# allow_flagging="never",
# )
LANGUAGES = {
'afrikaans' : 'af',
'albanian' : 'sq',
'amharic' : 'am',
'arabic' : 'ar',
'armenian' : 'hy',
'azerbaijani' : 'az',
'basque' : 'eu',
'belarusian' : 'be',
'bengali' : 'bn',
'bosnian' : 'bs',
'bulgarian' : 'bg',
'catalan' : 'ca',
'cebuano' : 'ceb',
'chichewa' : 'ny',
'chinese (simplified)' : 'zh-c',
'chinese (traditional)' : 'zh-t',
'corsican' : 'co',
'croatian' : 'hr',
'czech' : 'cs',
'danish' : 'da',
'dutch' : 'nl',
'english' : 'en',
'esperanto' : 'eo',
'estonian' : 'et',
'filipino' : 'tl',
'finnish' : 'fi',
'french' : 'fr',
'frisian' : 'fy',
'galician' : 'gl',
'georgian' : 'ka',
'german' : 'de',
'greek' : 'el',
'gujarati' : 'gu',
'haitian creole' : 'ht',
'hausa' : 'ha',
'hawaiian' : 'haw',
'hebrew' : 'iw',
'hebrew' : 'he',
'hindi' : 'hi',
'hmong' : 'hmn',
'hungarian' : 'hu',
'icelandic' : 'is',
'igbo' : 'ig',
'indonesian' : 'id',
'irish' : 'ga',
'italian' : 'it',
'japanese' : 'ja',
'javanese' : 'jw',
'kannada' : 'kn',
'kazakh' : 'kk',
'khmer' : 'km',
'korean' : 'ko',
'kurdish (kurmanji)' : 'ku',
'kyrgyz' : 'ky',
'lao' : 'lo',
'latin' : 'la',
'latvian' : 'lv',
'lithuanian' : 'lt',
'luxembourgish' : 'lb',
'macedonian' : 'mk',
'malagasy' : 'mg',
'malay' : 'ms',
'malayalam' : 'ml',
'maltese' : 'mt',
'maori' : 'mi',
'marathi' : 'mr',
'mongolian' : 'mn',
'myanmar (burmese)' : 'my',
'nepali' : 'ne',
'norwegian' : 'no',
'odia' : 'or',
'pashto' : 'ps',
'persian' : 'fa',
'polish' : 'pl',
'portuguese' : 'pt',
'punjabi' : 'pa',
'romanian' : 'ro',
'russian' : 'ru',
'samoan' : 'sm',
'scots gaelic' : 'gd',
'serbian' : 'sr',
'sesotho' : 'st',
'shona' : 'sn',
'sindhi' : 'sd',
'sinhala' : 'si',
'slovak' : 'sk',
'slovenian' : 'sl',
'somali' : 'so',
'spanish' : 'es',
'sundanese' : 'su',
'swahili' : 'sw',
'tajik' : 'tg',
'tamil' : 'ta',
'telugu' : 'te',
'thai' : 'th',
'turkish' : 'tr',
'ukrainian' : 'uk',
'urdu' : 'ur',
'uyghur' : 'ug',
'uzbek' : 'uz',
'vietnamese' : 'vi',
'welsh' : 'cy',
'xhosa' : 'xh',
'yiddish' : 'yi',
'yoruba' : 'yo',
'zulu' : 'zu',
}
def get_soundfile(link):
yt = YouTube(link)
audio = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
return audio
def translate(message, lang):
res = GoogleTranslator(source='sv', target=lang).translate(message)
if res != None and res != "":
return res
else:
return "Error, sorry!"
def transcribe(audio, lang, history, link):
if link != "":
audio = get_soundfile(link)
if lang is None or lang == "":
lang = 'english'
history = history or []
lang_code = LANGUAGES[lang]
text = pipe(audio)["text"]
history.append((text, translate(text, lang_code)))
return history, history
with gr.Blocks() as demo:
history = gr.State([])
with gr.Row():
with gr.Column():
language = gr.Dropdown(list(LANGUAGES.keys()), value="english")
audio = gr.Audio(source="microphone", type="filepath")
link = gr.Textbox(label = "Put YouTube link here", value="")
submit = gr.Button(value="Translate")
with gr.Column():
chatbot = gr.Chatbot().style(color_map=("green", "gray"))
submit.click(transcribe, inputs=[audio, language, history, link], outputs=[chatbot, history])
demo.launch()