| import streamlit as st | |
| import whisper | |
| from pytube import YouTube | |
| import os | |
| def get_audio(url): | |
| yt = YouTube(url) | |
| return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4") | |
| def get_transcript(url, model_size, lang, format): | |
| model = whisper.load_model(model_size) | |
| if lang == "None": | |
| lang = None | |
| result = model.transcribe(get_audio(url), fp16=False, language=lang) | |
| if format == "None": | |
| return result["text"] | |
| elif format == ".srt": | |
| return format_to_srt(result["segments"]) | |
| def format_to_srt(segments): | |
| output = "" | |
| for i, segment in enumerate(segments): | |
| output += f"{i + 1}\n" | |
| output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n" | |
| output += f"{segment['text']}\n\n" | |
| return output | |
| def format_timestamp(t): | |
| hh = t//3600 | |
| mm = (t - hh*3600)//60 | |
| ss = t - hh*3600 - mm*60 | |
| mi = (t - int(t))*1000 | |
| return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}" | |
| def save_srt(transcript): | |
| with open("transcript.srt", "w") as f: | |
| f.write(transcript) | |
| return True | |
| def download_srt(transcript): | |
| with open("done.txt", "w") as f: | |
| f.write(transcript) | |
| with open("done.txt", "r") as f: | |
| srt = format_to_srt(f.read()) | |
| with open("transcript.srt", "w") as f: | |
| f.write(srt) | |
| return st.download_button(label="Download Transcript (.srt)", data="transcript.srt") | |
| langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values())) | |
| model_size = list(whisper._MODELS.keys()) | |
| st.title("Whisper Transcription Demo") | |
| url = st.text_input("Youtube video URL") | |
| model_size = st.selectbox("Model", model_size) | |
| lang = st.selectbox("Language (Optional)", langs) | |
| format = st.selectbox("Timestamps? (Optional)", ["None", ".srt"]) | |
| st.markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.") | |
| if st.button("Transcribe"): | |
| transcript = get_transcript(url, model_size, lang, format) | |
| st.text_area("Transcription of the video", transcript) | |
| if format == ".srt": | |
| save_srt(transcript) | |
| download_srt(transcript) | |
| if os.path.exists("done.txt"): | |
| os.remove("done.txt") | |