File size: 6,884 Bytes
017fe6b
 
 
 
 
 
bde5cae
258ebd9
0da98f5
6229184
017fe6b
 
 
 
 
 
 
a710e3d
017fe6b
bde5cae
258ebd9
0da98f5
017fe6b
258ebd9
017fe6b
258ebd9
a710e3d
017fe6b
 
 
 
258ebd9
 
ab53db4
017fe6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258ebd9
017fe6b
a710e3d
 
 
017fe6b
 
 
 
 
 
 
 
 
 
724443f
017fe6b
a710e3d
017fe6b
258ebd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54a059f
258ebd9
a710e3d
54a059f
258ebd9
a710e3d
724443f
258ebd9
724443f
d62bd79
724443f
258ebd9
 
724443f
6229184
724443f
 
 
 
 
 
 
a710e3d
724443f
 
 
017fe6b
258ebd9
724443f
 
 
 
 
 
2271a2b
 
 
 
 
 
258ebd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
017fe6b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import streamlit as st
import math
import sys
from moviepy.editor import AudioClip, VideoFileClip, concatenate_videoclips
import numpy as np

@st.cache_data
def get_stats(_audio_clip, window_size = 0.60):
    audio_clip = _audio_clip
    num_windows = min(650, math.floor(audio_clip.end/window_size))
    max_volumes = []
    for i in range(num_windows):
        s = audio_clip.subclip(i * window_size, (i + 1) * window_size)
        v = s.max_volume()
        max_volumes.append(v)

    arr = np.array(max_volumes)
    return {"mean": np.mean(arr), "std": np.std(arr),"max": np.max(arr), "median": np.median(arr), "clips":len(max_volumes)}

@st.cache_data
def find_speaking(_audio_clip, window_size=0.60, volume_threshold=0.01, ease_in=0.20):
    audio_clip = _audio_clip
    # First, iterate over audio to find all silent windows.
    num_windows = min(650, math.floor(audio_clip.end/window_size))
    window_is_silent = []
    silent_windows = []
    #can optimize and multi thread
    for i in range(num_windows):
        s = audio_clip.subclip(i * window_size, (i + 1) * window_size)
        v = s.max_volume()
        window_is_silent.append(v < volume_threshold)
        if v < volume_threshold:
            silent_windows.append([i * window_size, (i + 1) * window_size])

    # Find speaking intervals.
    speaking_start = 0
    speaking_end = 0
    speaking_intervals = []
    for i in range(1, len(window_is_silent)):
        e1 = window_is_silent[i - 1]
        e2 = window_is_silent[i]
        # silence -> speaking
        if e1 and not e2:
            speaking_start = i * window_size
        # speaking -> silence, now have a speaking interval
        if not e1 and e2:
            speaking_end = i * window_size
            new_speaking_interval = [speaking_start - ease_in, speaking_end + ease_in]
            # With tiny windows, this can sometimes overlap the previous window, so merge.
            need_to_merge = len(speaking_intervals) > 0 and speaking_intervals[-1][1] > new_speaking_interval[0]
            if need_to_merge:
                merged_interval = [speaking_intervals[-1][0], new_speaking_interval[1]]
                speaking_intervals[-1] = merged_interval
            else:
                speaking_intervals.append(new_speaking_interval)

    return speaking_intervals, silent_windows

def read_video(name):
    return VideoFileClip(name)

def main():
    st.title('Video Editor')
    st.subheader('Cuts silences given a threshold')
    uploaded_file = st.file_uploader("Please upload a video file (mp4 or mov)", type=["mp4", "mov"])

    if uploaded_file is not None:
        with open(uploaded_file.name, "wb") as f:
            f.write(uploaded_file.read())

        st.subheader('Original Video')
        st.write(uploaded_file.name)
        st.video(uploaded_file)
        vid = read_video(uploaded_file.name)
        stats = get_stats(vid.audio)
        with st.status('Settings'):
            col1, col2, col3, col4, col5 = st.columns(5)
            with col1:
                st.write("Std noise")
                st.write(stats["std"])
            with col2:
                st.write("Max noise")
                st.write(stats["max"])
            with col3:
                st.write("Mean noise")
                st.write(stats["mean"])
            with col4:
                st.write("Median noise")
                st.write(stats["median"])
            with col5:
                st.write("Number of clips")
                st.write(stats["clips"])

            threshold = st.slider("Control the sound threshold (%) here. The lower the threshold the more you keep.", min_value=0, max_value=100, step=5 , value=40)
        if st.button('Cut'):
            with st.spinner('Looking for the sound of silence...'):
                intervals_to_keep, silent_windows = find_speaking(vid.audio, volume_threshold=threshold*(stats["mean"]+stats["std"])/100)
                st.write("Found the silence. Cutting...")
                keep_clips = [vid.subclip(start, end) for [start, end] in intervals_to_keep]
                silent_clips = [vid.subclip(start,end) for [start,end] in silent_windows]
                if len(keep_clips) == 0:
                    st.write("Nothing to keep with current settings. Try increasing threshold")
                elif len(keep_clips) != 0:
                    st.write(f'We cut {len(silent_clips)} sections of the video')
                    st.write(f'We kept {stats["clips"] - len(silent_clips)} sections of the video')
                    edited_video = concatenate_videoclips(keep_clips)
                    st.write('Saving edited video...')
                    edited_video.write_videofile('edited.mp4',
                        fps=60,
                        preset='ultrafast',
                        codec='libx264',
                        temp_audiofile='temp-audio.m4a',
                        remove_temp=True,
                        audio_codec="aac",
                        threads=4
                    )
                    st.subheader('Edited Video')
                    st.video('edited.mp4')

            
                    with open('edited.mp4', 'rb') as f:
                        video_bytes = f.read()
                        if st.download_button(label='Download file',
                                        data=video_bytes,
                                        file_name='edited.mp4',
                                        mime='video/mp4'):
                            if os.path.exists('edited.mp4'):
                                os.remove('edited.mp4')
                                st.info('Deleted edited.mp4 from filesystem.')
                            if os.path.exists(uploaded_file.name):
                                os.remove(uploaded_file.name)
                                st.info('Deleted edited.mp4 from filesystem.')
                            for silent_clip in silent_clips:
                                name =f'removed_{counter}.mp4' 
                                if os.path.exists(name):
                                    os.remove(name)
                            counter = 1

                    counter = 1
                    #TODO
                    #for silent_clip in silent_clips:
                    #    name =f'removed_{counter}.mp4'
                    #    silent_clip.write_videofile(name,
                    #    fps=60,
                    #    preset='ultrafast',
                    #    codec='libx264',
                    #    temp_audiofile=f'temp-{counter}.m4a',
                    #    remove_temp=True,
                    #    audio_codec="aac",
                    #    threads=4
                    #    )
                    #    st.video(name)
                    #    counter = counter + 1
                    vid.close()
                    

if __name__ == '__main__':
    main()