YTYT / app.py
JoshMe1's picture
Update app.py
5d96714
import streamlit as st
import whisper
from pytube import YouTube
import os
def get_audio(url):
yt = YouTube(url)
return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
def get_transcript(url, model_size, lang, format):
model = whisper.load_model(model_size)
if lang == "None":
lang = None
result = model.transcribe(get_audio(url), fp16=False, language=lang)
if format == "None":
return result["text"]
elif format == ".srt":
return format_to_srt(result["segments"])
def format_to_srt(segments):
output = ""
for i, segment in enumerate(segments):
output += f"{i + 1}\n"
output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
output += f"{segment['text']}\n\n"
return output
def format_timestamp(t):
hh = t//3600
mm = (t - hh*3600)//60
ss = t - hh*3600 - mm*60
mi = (t - int(t))*1000
return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}"
def save_srt(transcript):
with open("transcript.srt", "w") as f:
f.write(transcript)
return True
def download_srt(transcript):
with open("done.txt", "w") as f:
f.write(transcript)
with open("done.txt", "r") as f:
srt = format_to_srt(f.read())
with open("transcript.srt", "w") as f:
f.write(srt)
return st.download_button(label="Download Transcript (.srt)", data="transcript.srt")
langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
model_size = list(whisper._MODELS.keys())
st.title("Whisper Transcription Demo")
url = st.text_input("Youtube video URL")
model_size = st.selectbox("Model", model_size)
lang = st.selectbox("Language (Optional)", langs)
format = st.selectbox("Timestamps? (Optional)", ["None", ".srt"])
st.markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
if st.button("Transcribe"):
transcript = get_transcript(url, model_size, lang, format)
st.text_area("Transcription of the video", transcript)
if format == ".srt":
save_srt(transcript)
download_srt(transcript)
if os.path.exists("done.txt"):
os.remove("done.txt")