Spaces:
Sleeping
Sleeping
File size: 2,203 Bytes
843111c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from os import path as osp
import json
import cv2
import webvtt
from utils import maintain_aspect_ratio_resize, str2time
def extract_and_save_frames_and_metadata(
path_to_video,
path_to_transcript,
path_to_save_extracted_frames,
path_to_save_metadatas):
# metadatas will store the metadata of all extracted frames
metadatas = []
# load video using cv2
video = cv2.VideoCapture(path_to_video)
# load transcript using webvtt
trans = webvtt.read(path_to_transcript)
# iterate transcript file
# for each video segment specified in the transcript file
for idx, transcript in enumerate(trans):
# get the start time and end time in seconds
start_time_ms = str2time(transcript.start)
end_time_ms = str2time(transcript.end)
# get the time in ms exactly
# in the middle of start time and end time
mid_time_ms = (end_time_ms + start_time_ms) / 2
# get the transcript, remove the next-line symbol
text = transcript.text.replace("\n", ' ')
# get frame at the middle time
video.set(cv2.CAP_PROP_POS_MSEC, mid_time_ms)
success, frame = video.read()
if success:
# if the frame is extracted successfully, resize it
image = maintain_aspect_ratio_resize(frame, height=350)
# save frame as JPEG file
img_fname = f'frame_{idx}.jpg'
img_fpath = osp.join(
path_to_save_extracted_frames, img_fname
)
cv2.imwrite(img_fpath, image)
# prepare the metadata
metadata = {
'extracted_frame_path': img_fpath,
'transcript': text,
'video_segment_id': idx,
'video_path': path_to_video,
'mid_time_ms': mid_time_ms,
}
metadatas.append(metadata)
else:
print(f"ERROR! Cannot extract frame: idx = {idx}")
# save metadata of all extracted frames
fn = osp.join(path_to_save_metadatas, 'metadatas.json')
with open(fn, 'w') as outfile:
json.dump(metadatas, outfile)
return metadatas |