Whisper / app.py
Danker's picture
Update app.py
fd3199c
from transformers import pipeline
import gradio as gr
import moviepy.editor as mp
from pytube import YouTube
import math
import youtube_dl
import validators
pipe = pipeline(model="errno98/whisper-small-hi")
segment_len = 30
def download_soundcloud(url):
options = {
'format': 'bestaudio/best',
'extractaudio' : True, # only keep the audio
'audioformat' : "mp3", # convert to mp3
'outtmpl': '%(id)s', # name the file the ID of the video
'noplaylist' : True, # only download single song, not playlist
}
with youtube_dl.YoutubeDL() as ydl:
ydl.download([url])
# download metadata
ydl = youtube_dl.YoutubeDL(options)
r = None
url = url
with ydl:
# don't download, much faster
r = ydl.extract_info(url, download=True)
global video_clip
global audio_wav
global audio_len
#my_clip = mp.VideoFileClip(r)
#my_clip.audio.write_audiofile("audio.wav")
#audio_wav = mp.AudioFileClip("audio.wav")
#audio_len = audio_wav.duration
return r['title']
def load_vid(url):
video = (
YouTube(url)
.streams.filter(progressive=True, file_extension="mp4")
.first()
.download()
)
global video_clip
global audio_wav
global audio_len
video_clip = mp.VideoFileClip(video)
video_clip.audio.write_audiofile("audio.wav")
audio_wav = mp.AudioFileClip("audio.wav")
audio_len = audio_wav.duration
return video
def validate_link(url):
try:
yt = YouTube(url)
except Exception:
return True
video_length = yt.length
if video_length > 600:
print("Video length is too long (longer than 10 minutes)")
return False
else:
return True
def clippify(index, seg_total):
audio_file = "audio_out"+str(index)+".wav"
audio_clipped_obj = mp.AudioFileClip.copy(audio_wav)
if (index > 0):
audio_clipped_obj = mp.AudioFileClip.cutout(audio_clipped_obj, 0, segment_len * (index))
if (index < seg_total - 1):
audio_clipped_obj = mp.AudioFileClip.cutout(audio_clipped_obj, segment_len * (index + 1), audio_length)
mp.AudioFileClip.write_audiofile(audio_clipped_obj, audio_file)
return audio_file
def transcribe(url):
if url[0:8] == "https://" or url[0:8] == "http://":
url = url[8::]
text = ""
if not validators.url(url):
if not validate_link(url):
return "Not a YouTube video"
else:
load_vid(url)
else:
return "Incorrect URL structure"
segment_count = math.ceil(audio_len / segment_len)
if segment_count <= 0:
return "Invalid segment length"
else:
for x in range(segment_count):
audio = clippify(x, segment_count)
seg_text = pipe(audio, batch_size=512, truncation=True)["text"]
print("Segtext: ")
print(seg_text)
text = text + seg_text
return text
iface = gr.Interface(
fn=transcribe,
inputs=gr.Textbox(label = "Input the URL of a YouTube video:"),
outputs="text",
title="Whisper Small SE",
description="Video Swedish Transcription",
)
iface.launch()