|
|
|
|
|
|
|
|
""" |
|
|
preprocess_videos_v2.py |
|
|
|
|
|
本脚本是 preprocess_videos.py 的 v2 版本。 |
|
|
它会创建 AudioVisual.py 期望的嵌套目录结构。 |
|
|
|
|
|
它会为每个 .mp4 文件 (例如 video1.mp4) 创建如下结构: |
|
|
.../data/video1_frames/ |
|
|
├── frame_1/ |
|
|
│ └── video1.jpg |
|
|
├── frame_2/ |
|
|
│ └── video1.jpg |
|
|
... |
|
|
└── frame_10/ |
|
|
└── video1.jpg |
|
|
""" |
|
|
|
|
|
import os |
|
|
import cv2 |
|
|
import glob |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SOURCE_DIR = '/home5/bwd/equiav/videos' |
|
|
|
|
|
|
|
|
TARGET_DIR = '/home5/bwd/equiav/frames' |
|
|
|
|
|
|
|
|
NUM_FRAMES = 11 |
|
|
|
|
|
|
|
|
FRAME_DIR_SUFFIX = '_frames' |
|
|
|
|
|
|
|
|
FRAME_SUBDIR_FORMAT = 'frame_{:d}' |
|
|
|
|
|
|
|
|
|
|
|
def preprocess_videos_v2(): |
|
|
""" |
|
|
主处理函数 (V2) |
|
|
""" |
|
|
print(f"开始预处理视频 (V2)...") |
|
|
print(f"源目录: {SOURCE_DIR}") |
|
|
print(f"目标目录: {TARGET_DIR}") |
|
|
print(f"每个视频提取帧数: {NUM_FRAMES}\n") |
|
|
|
|
|
mp4_files = glob.glob(os.path.join(SOURCE_DIR, '**', '*.mp4'), recursive=True) |
|
|
|
|
|
if not mp4_files: |
|
|
print(f"错误: 在 {SOURCE_DIR} 中未找到 .mp4 文件。") |
|
|
return |
|
|
|
|
|
print(f"找到了 {len(mp4_files)} 个 .mp4 文件。\n") |
|
|
|
|
|
for mp4_file_path in mp4_files: |
|
|
try: |
|
|
|
|
|
video_id = os.path.splitext(os.path.basename(mp4_file_path))[0] |
|
|
|
|
|
base_frame_output_dir = os.path.join(TARGET_DIR, video_id + FRAME_DIR_SUFFIX) |
|
|
|
|
|
|
|
|
cap = cv2.VideoCapture(mp4_file_path) |
|
|
if not cap.isOpened(): |
|
|
print(f"错误: 无法打开视频文件 {mp4_file_path}") |
|
|
continue |
|
|
|
|
|
|
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
|
|
|
if total_frames < NUM_FRAMES: |
|
|
print(f"警告: {video_id}.mp4 只有 {total_frames} 帧 (少于 {NUM_FRAMES} 帧)。跳过此视频。") |
|
|
cap.release() |
|
|
continue |
|
|
|
|
|
|
|
|
indices = np.linspace(0, total_frames - 1, NUM_FRAMES, dtype=int) |
|
|
|
|
|
print(f"正在处理 {video_id}.mp4 ... (共 {total_frames} 帧, 提取 {len(indices)} 帧)") |
|
|
|
|
|
|
|
|
frames_saved = 0 |
|
|
for i, frame_index in enumerate(indices): |
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) |
|
|
ret, frame = cap.read() |
|
|
|
|
|
if ret: |
|
|
|
|
|
|
|
|
frame_subdir = os.path.join(base_frame_output_dir, FRAME_SUBDIR_FORMAT.format(i + 1)) |
|
|
os.makedirs(frame_subdir, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
output_path = os.path.join(frame_subdir, video_id + '.jpg') |
|
|
|
|
|
cv2.imwrite(output_path, frame) |
|
|
frames_saved += 1 |
|
|
else: |
|
|
print(f" - 错误: 无法读取帧索引 {frame_index}") |
|
|
|
|
|
print(f" -> 完成。 {frames_saved} 帧已保存至 {base_frame_output_dir} (嵌套结构)\n") |
|
|
|
|
|
cap.release() |
|
|
|
|
|
except Exception as e: |
|
|
print(f"处理 {mp4_file_path} 时发生未知错误: {e}") |
|
|
if 'cap' in locals() and cap.isOpened(): |
|
|
cap.release() |
|
|
|
|
|
print("="*30) |
|
|
print("所有视频预处理 (V2) 完成。") |
|
|
print("="*30) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
preprocess_videos_v2() |
|
|
|
|
|
|