Spaces:

GoodOnions
/

ID2223-Lab2

Sleeping

File size: 1,629 Bytes

e27d757
edf52c8
 
 
 
e27d757
edf52c8
 
 
 
54e968a
e27d757
 
edf52c8
 
fed42dc
2f8b01b
edf52c8
 
 
93d0e9f
edf52c8
93d0e9f
edf52c8
93d0e9f
e41fede
93d0e9f
e41fede
edf52c8
 
 
 
 
 
 
 
 
 
 
 
5c6d179
edf52c8
 
 
 
e928442
8982fe5
edf52c8

from transformers import pipeline
import gradio as gr
import os
import deepl
from pytube import YouTube

TARGET_LANG = "EN-GB"
deepl_key = os.environ.get('DEEPL_KEY')

translator = deepl.Translator(deepl_key)
pipe = pipeline("automatic-speech-recognition", model="FredBonux/whisper-small-it")

def transcribe(audio):
    ita = pipe(audio)["text"]
    eng = translator.translate_text(ita, target_lang=TARGET_LANG).text
    print(f"{ita} -> {eng}")
    return ita, eng

def transcribe_url(url):
    youtube = YouTube(str(url))
    print("Downloading video")
    audio = youtube.streams.filter(only_audio=True).first().download('yt_video')
    print("Downloaded")
    text_it = pipe(audio)["text"]
    print(f"{text_it}")
    text_en = translator.translate_text(text_it, target_lang=TARGET_LANG).text
    print(f"{text_en}")
    return text_it, text_en

url_demo = gr.Interface(
    fn=transcribe_url, 
    inputs="text", 
    outputs=[gr.Textbox(label="Transcribed text"),
             gr.Textbox(label="English translation")],
    title="Italian video to english text",
    description="Transcribing italian video to text and translating it to english!",
)

voice_demo = gr.Interface(
    fn=transcribe, 
    inputs=gr.Audio(sources=["microphone"], type="filepath"), 
    outputs=[gr.Textbox(label="Transcribed text"),
             gr.Textbox(label="English translation")],
    title="Italian recorded speech to english text",
    description="Transcribing italian speech to text and translating it to english!",
)

app = gr.TabbedInterface([url_demo, voice_demo], ["Video to English Text", "Audio to English Text"])

app.launch()