File size: 2,264 Bytes
d5d73d3
 
 
5d96714
d5d73d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e4f9e5
 
 
 
 
5d96714
 
 
 
 
 
 
 
 
d5d73d3
 
 
 
 
 
 
 
 
 
 
 
 
 
730e8ab
ae4618e
5d96714
f4c9403
5d96714
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import whisper
from pytube import YouTube
import os

def get_audio(url):
    yt = YouTube(url)
    return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")

def get_transcript(url, model_size, lang, format):

    model = whisper.load_model(model_size)

    if lang == "None":
        lang = None
    
    result = model.transcribe(get_audio(url), fp16=False, language=lang)

    if format == "None":
        return result["text"]
    elif format == ".srt":
        return format_to_srt(result["segments"])

def format_to_srt(segments):
    output = ""
    for i, segment in enumerate(segments):
        output += f"{i + 1}\n"
        output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
        output += f"{segment['text']}\n\n"
    return output

def format_timestamp(t):
    hh = t//3600
    mm = (t - hh*3600)//60
    ss = t - hh*3600 - mm*60
    mi = (t - int(t))*1000
    return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}"

def save_srt(transcript):
    with open("transcript.srt", "w") as f:
        f.write(transcript)
    return True

def download_srt(transcript):
    with open("done.txt", "w") as f:
        f.write(transcript)
    with open("done.txt", "r") as f:
        srt = format_to_srt(f.read())
    with open("transcript.srt", "w") as f:
        f.write(srt)
    return st.download_button(label="Download Transcript (.srt)", data="transcript.srt")

langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
model_size = list(whisper._MODELS.keys())

st.title("Whisper Transcription Demo")

url = st.text_input("Youtube video URL")
model_size = st.selectbox("Model", model_size)
lang = st.selectbox("Language (Optional)", langs)
format = st.selectbox("Timestamps? (Optional)", ["None", ".srt"])
st.markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")

if st.button("Transcribe"):
    transcript = get_transcript(url, model_size, lang, format)
    st.text_area("Transcription of the video", transcript)
    if format == ".srt":
        save_srt(transcript)
        download_srt(transcript)

if os.path.exists("done.txt"):
    os.remove("done.txt")