| import os
|
| import cv2
|
| import numpy as np
|
| import torch
|
| import torch.nn as nn
|
| from PIL import Image
|
| from moviepy.editor import VideoFileClip
|
| import logging
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
| class BMTPreprocessor:
|
| """
|
| Handles video and audio preprocessing for BMT feature extraction.
|
| """
|
| def __init__(self, target_sr=16000, target_fps=25):
|
| self.target_sr = target_sr
|
| self.target_fps = target_fps
|
|
|
| def extract_audio(self, video_path, output_audio_path):
|
| """
|
| Extracts audio from video and saves as WAV.
|
| """
|
| try:
|
| video = VideoFileClip(video_path)
|
| if video.audio:
|
| video.audio.write_audiofile(output_audio_path, fps=self.target_sr, logger=None)
|
| return True
|
| else:
|
| logger.warning(f"No audio track found in {video_path}")
|
| return False
|
| except Exception as e:
|
| logger.error(f"Audio extraction failed: {e}")
|
| return False
|
|
|
| def sample_video_frames(self, video_path, resize=(224, 224)):
|
| """
|
| Samples frames from video at the target FPS.
|
| """
|
| frames = []
|
| cap = cv2.VideoCapture(video_path)
|
| fps = cap.get(cv2.CAP_PROP_FPS)
|
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
|
| step = max(1, int(fps / self.target_fps))
|
|
|
| count = 0
|
| while True:
|
| ret, frame = cap.read()
|
| if not ret:
|
| break
|
|
|
| if count % step == 0:
|
| frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| frame = cv2.resize(frame, resize)
|
| frames.append(frame)
|
| count += 1
|
|
|
| cap.release()
|
| return np.stack(frames) if frames else None
|
|
|
| class I3D(nn.Module):
|
| """
|
| Simplified Inflated 3D ConvNet (I3D) architecture shell.
|
| In real production, this would load the full architecture.
|
| """
|
| def __init__(self, num_classes=400, in_channels=3):
|
| super(I3D, self).__init__()
|
|
|
|
|
|
|
| self.conv3d_1a_7x7 = nn.Conv3d(in_channels, 64, kernel_size=(7, 7, 7), stride=(2, 2, 2), padding=(3, 3, 3))
|
|
|
| self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
|
| self.logits = nn.Conv3d(1024, num_classes, kernel_size=(1, 1, 1))
|
|
|
| def forward(self, x):
|
|
|
|
|
| pass
|
|
|
| class VGGish(nn.Module):
|
| """
|
| Simplified VGGish shell.
|
| """
|
| def __init__(self):
|
| super(VGGish, self).__init__()
|
|
|
| pass
|
|
|
| def forward(self, x):
|
|
|
| pass
|
|
|