|
|
from os import path as osp |
|
|
import json |
|
|
|
|
|
import cv2 |
|
|
import webvtt |
|
|
|
|
|
from utils import maintain_aspect_ratio_resize, str2time |
|
|
|
|
|
def extract_and_save_frames_and_metadata( |
|
|
path_to_video, |
|
|
path_to_transcript, |
|
|
path_to_save_extracted_frames, |
|
|
path_to_save_metadatas): |
|
|
|
|
|
|
|
|
metadatas = [] |
|
|
|
|
|
|
|
|
video = cv2.VideoCapture(path_to_video) |
|
|
|
|
|
trans = webvtt.read(path_to_transcript) |
|
|
|
|
|
|
|
|
|
|
|
for idx, transcript in enumerate(trans): |
|
|
|
|
|
|
|
|
start_time_ms = str2time(transcript.start) |
|
|
end_time_ms = str2time(transcript.end) |
|
|
|
|
|
|
|
|
mid_time_ms = (end_time_ms + start_time_ms) / 2 |
|
|
|
|
|
text = transcript.text.replace("\n", ' ') |
|
|
|
|
|
video.set(cv2.CAP_PROP_POS_MSEC, mid_time_ms) |
|
|
success, frame = video.read() |
|
|
if success: |
|
|
|
|
|
image = maintain_aspect_ratio_resize(frame, height=350) |
|
|
|
|
|
img_fname = f'frame_{idx}.jpg' |
|
|
img_fpath = osp.join( |
|
|
path_to_save_extracted_frames, img_fname |
|
|
) |
|
|
cv2.imwrite(img_fpath, image) |
|
|
|
|
|
|
|
|
metadata = { |
|
|
'extracted_frame_path': img_fpath, |
|
|
'transcript': text, |
|
|
'video_segment_id': idx, |
|
|
'video_path': path_to_video, |
|
|
'mid_time_ms': mid_time_ms, |
|
|
} |
|
|
metadatas.append(metadata) |
|
|
|
|
|
else: |
|
|
print(f"ERROR! Cannot extract frame: idx = {idx}") |
|
|
|
|
|
|
|
|
fn = osp.join(path_to_save_metadatas, 'metadatas.json') |
|
|
with open(fn, 'w') as outfile: |
|
|
json.dump(metadatas, outfile) |
|
|
return metadatas |