File size: 3,902 Bytes
1bb3d83
 
ab4e539
171b1d2
f19b32e
1bb3d83
7e630e9
8b4e83e
ad050ba
1bb3d83
171b1d2
 
 
 
 
 
 
 
 
b91c197
 
ad050ba
 
 
8b4e83e
ad050ba
 
171b1d2
 
 
 
 
 
 
7e630e9
171b1d2
 
 
 
7e630e9
4b38eb3
171b1d2
 
4b38eb3
171b1d2
 
 
 
 
 
 
 
ad050ba
 
 
658520f
ad050ba
 
658520f
ad050ba
5e27d13
658520f
ad050ba
 
 
658520f
 
ad050ba
ab4e539
 
171b1d2
8b4e83e
171b1d2
 
7e630e9
ad050ba
 
171b1d2
7e630e9
ad050ba
33f8805
5e27d13
ad050ba
 
 
 
 
6fdc43d
 
 
 
 
171b1d2
 
 
da2200a
 
 
 
e730b78
 
921236f
 
 
 
 
e730b78
bfd1bca
921236f
 
 
 
 
 
 
d3fcfe3
e730b78
1bb3d83
bfd1bca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from transformers import pipeline
import gradio as gr
import moviepy.editor as mp
from pytube import YouTube
import math

pipe = pipeline(model="Campfireman/whisper-small-hi")  # change to "your-username/the-name-you-picked"

segment_length = 25 # 25s per segment 

def download_video(url):
    print("Downloading...")
    local_file = (
        YouTube(url)
        .streams.filter(progressive=True, file_extension="mp4")
        .first()
        .download()
    )
    print("Downloaded")
    global my_clip
    global original_wav
    my_clip = mp.VideoFileClip(local_file)
    my_clip.audio.write_audiofile("AUDIO_ORIGINAL.wav")
    original_wav = mp.AudioFileClip("AUDIO_ORIGINAL.wav")
    global audio_length
    audio_length = original_wav.duration
    print("Overall audio time elapsed: "+str(audio_length))
    return local_file

def validate_youtube(url):
    #This creates a youtube object
    try:
        yt = YouTube(url)  
    except Exception:
        print("Hi the URL seems not a valid YouTube video link")
        return True
    #This will return the length of the video in sec as an int
    video_length = yt.length
    if video_length > 600:
        print("Your video is longer than 10 minutes")
        return False
    else:
        print("Your video is less than 10 minutes")
        return True

def validate_url(url):
    import validators
    if not validators.url(url):
        return True
    else:
        return False  

def audio_clipper(index, seg_total):
    my_audio = "audio_out"+str(index)+".wav"
    audio_clipped_obj = mp.AudioFileClip.copy(original_wav)
    print("Segment "+str(index)+":")
    # Clipping
    if (index > 0):
        print("Clipped: 0 ~ " + str(segment_length * (index)) + "sec")
        audio_clipped_obj = mp.AudioFileClip.cutout(audio_clipped_obj, 0, segment_length * (index))
    if (index < seg_total - 1):
        print("Clipped: " + str(segment_length * (index + 1)) + "~ " + str(audio_length) +" sec")
        audio_clipped_obj = mp.AudioFileClip.cutout(audio_clipped_obj, segment_length * (index + 1), audio_length)
    
    # Write out the temporary segment data
    mp.AudioFileClip.write_audiofile(audio_clipped_obj, my_audio)
    #audio_clipped_obj.audio.write_audiofile(my_audio)
    
    return my_audio

def transcribe(video_url):
    text = ""
    if validate_url(video_url):
        if not validate_youtube(video_url):
            return "The URL seems not for Youtube videos or the video is too long. Check out the errors in the log. "
        else:
            download_video(video_url)
    else: 
        return "Invalid URL. Please check the format of your link. "

    segment_count = math.ceil(audio_length / segment_length) 
    print("Total segments: "+str(segment_count))
    if segment_count <= 0:
        return "Corrupted Video Data! Invalid length of "+str(segment_count * 25)+" second(s)."
    else:
        for x in range(segment_count):
            audio = audio_clipper(x, segment_count)
            seg_text = pipe(audio, batch_size=512, truncation=True)["text"]
            print("Segtext: ")
            print(seg_text)
            text = text + seg_text
            
    return text


def transcribe2(audio):
    text = pipe(audio)["text"]
    return text
    
 
iface = gr.Interface( fn=transcribe, 
    inputs=gr.Textbox(label = "Enter the URL of the Youtube video clip here (without prefixes like http://):"), 
     outputs="text",
     title="Whisper Small SE",
     description="Video Swedish Transcriptior",
 )
    

iface2 = gr.Interface(
    fn=transcribe2, 
    inputs=gr.Audio(source="microphone", type="filepath"), 
    outputs="text",
    title="Whisper Small Swedish",
    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)

demo = gr.TabbedInterface([iface, iface2],["Swedish YouTube Video to Text", "Swedish Audio to Text"])

demo.launch()