Spaces:
Sleeping
Sleeping
File size: 6,884 Bytes
017fe6b bde5cae 258ebd9 0da98f5 6229184 017fe6b a710e3d 017fe6b bde5cae 258ebd9 0da98f5 017fe6b 258ebd9 017fe6b 258ebd9 a710e3d 017fe6b 258ebd9 ab53db4 017fe6b 258ebd9 017fe6b a710e3d 017fe6b 724443f 017fe6b a710e3d 017fe6b 258ebd9 54a059f 258ebd9 a710e3d 54a059f 258ebd9 a710e3d 724443f 258ebd9 724443f d62bd79 724443f 258ebd9 724443f 6229184 724443f a710e3d 724443f 017fe6b 258ebd9 724443f 2271a2b 258ebd9 017fe6b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | import streamlit as st
import math
import sys
from moviepy.editor import AudioClip, VideoFileClip, concatenate_videoclips
import numpy as np
@st.cache_data
def get_stats(_audio_clip, window_size = 0.60):
audio_clip = _audio_clip
num_windows = min(650, math.floor(audio_clip.end/window_size))
max_volumes = []
for i in range(num_windows):
s = audio_clip.subclip(i * window_size, (i + 1) * window_size)
v = s.max_volume()
max_volumes.append(v)
arr = np.array(max_volumes)
return {"mean": np.mean(arr), "std": np.std(arr),"max": np.max(arr), "median": np.median(arr), "clips":len(max_volumes)}
@st.cache_data
def find_speaking(_audio_clip, window_size=0.60, volume_threshold=0.01, ease_in=0.20):
audio_clip = _audio_clip
# First, iterate over audio to find all silent windows.
num_windows = min(650, math.floor(audio_clip.end/window_size))
window_is_silent = []
silent_windows = []
#can optimize and multi thread
for i in range(num_windows):
s = audio_clip.subclip(i * window_size, (i + 1) * window_size)
v = s.max_volume()
window_is_silent.append(v < volume_threshold)
if v < volume_threshold:
silent_windows.append([i * window_size, (i + 1) * window_size])
# Find speaking intervals.
speaking_start = 0
speaking_end = 0
speaking_intervals = []
for i in range(1, len(window_is_silent)):
e1 = window_is_silent[i - 1]
e2 = window_is_silent[i]
# silence -> speaking
if e1 and not e2:
speaking_start = i * window_size
# speaking -> silence, now have a speaking interval
if not e1 and e2:
speaking_end = i * window_size
new_speaking_interval = [speaking_start - ease_in, speaking_end + ease_in]
# With tiny windows, this can sometimes overlap the previous window, so merge.
need_to_merge = len(speaking_intervals) > 0 and speaking_intervals[-1][1] > new_speaking_interval[0]
if need_to_merge:
merged_interval = [speaking_intervals[-1][0], new_speaking_interval[1]]
speaking_intervals[-1] = merged_interval
else:
speaking_intervals.append(new_speaking_interval)
return speaking_intervals, silent_windows
def read_video(name):
return VideoFileClip(name)
def main():
st.title('Video Editor')
st.subheader('Cuts silences given a threshold')
uploaded_file = st.file_uploader("Please upload a video file (mp4 or mov)", type=["mp4", "mov"])
if uploaded_file is not None:
with open(uploaded_file.name, "wb") as f:
f.write(uploaded_file.read())
st.subheader('Original Video')
st.write(uploaded_file.name)
st.video(uploaded_file)
vid = read_video(uploaded_file.name)
stats = get_stats(vid.audio)
with st.status('Settings'):
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.write("Std noise")
st.write(stats["std"])
with col2:
st.write("Max noise")
st.write(stats["max"])
with col3:
st.write("Mean noise")
st.write(stats["mean"])
with col4:
st.write("Median noise")
st.write(stats["median"])
with col5:
st.write("Number of clips")
st.write(stats["clips"])
threshold = st.slider("Control the sound threshold (%) here. The lower the threshold the more you keep.", min_value=0, max_value=100, step=5 , value=40)
if st.button('Cut'):
with st.spinner('Looking for the sound of silence...'):
intervals_to_keep, silent_windows = find_speaking(vid.audio, volume_threshold=threshold*(stats["mean"]+stats["std"])/100)
st.write("Found the silence. Cutting...")
keep_clips = [vid.subclip(start, end) for [start, end] in intervals_to_keep]
silent_clips = [vid.subclip(start,end) for [start,end] in silent_windows]
if len(keep_clips) == 0:
st.write("Nothing to keep with current settings. Try increasing threshold")
elif len(keep_clips) != 0:
st.write(f'We cut {len(silent_clips)} sections of the video')
st.write(f'We kept {stats["clips"] - len(silent_clips)} sections of the video')
edited_video = concatenate_videoclips(keep_clips)
st.write('Saving edited video...')
edited_video.write_videofile('edited.mp4',
fps=60,
preset='ultrafast',
codec='libx264',
temp_audiofile='temp-audio.m4a',
remove_temp=True,
audio_codec="aac",
threads=4
)
st.subheader('Edited Video')
st.video('edited.mp4')
with open('edited.mp4', 'rb') as f:
video_bytes = f.read()
if st.download_button(label='Download file',
data=video_bytes,
file_name='edited.mp4',
mime='video/mp4'):
if os.path.exists('edited.mp4'):
os.remove('edited.mp4')
st.info('Deleted edited.mp4 from filesystem.')
if os.path.exists(uploaded_file.name):
os.remove(uploaded_file.name)
st.info('Deleted edited.mp4 from filesystem.')
for silent_clip in silent_clips:
name =f'removed_{counter}.mp4'
if os.path.exists(name):
os.remove(name)
counter = 1
counter = 1
#TODO
#for silent_clip in silent_clips:
# name =f'removed_{counter}.mp4'
# silent_clip.write_videofile(name,
# fps=60,
# preset='ultrafast',
# codec='libx264',
# temp_audiofile=f'temp-{counter}.m4a',
# remove_temp=True,
# audio_codec="aac",
# threads=4
# )
# st.video(name)
# counter = counter + 1
vid.close()
if __name__ == '__main__':
main()
|