Spaces:
Runtime error
Runtime error
File size: 3,171 Bytes
1e4485c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import cv2
import torch
import numpy as np
from PIL import Image
import joblib
from facenet_pytorch import MTCNN, InceptionResnetV1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")
# Load models
mtcnn = MTCNN(image_size=160, device=device)
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
clf = joblib.load("model/ensemble_model.pkl") # Example classifier model
label_map = {0: "real", 1: "deepfake", 2: "ai_gen"}
def extract_faces_from_video(video_path, time_interval_sec=10):
cap = cv2.VideoCapture(video_path)
embeddings = []
# Get the total number of frames in the video and the FPS
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS())
video_duration = total_frames / fps # Video duration in seconds
print(f"[INFO] Video duration: {video_duration} seconds, FPS: {fps}")
# Calculate the frame skip based on the desired time interval
frame_skip = int(fps * time_interval_sec) # Process frames every 'time_interval_sec' seconds
print(f"[INFO] Processing every {time_interval_sec} seconds. Skipping {frame_skip} frames.")
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret:
break
# Process frames based on the calculated frame skip
if frame_idx % frame_skip == 0:
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Convert to RGB
combined_features = extract_combined_features(image) # Assuming extract_combined_features() is defined
if combined_features is not None:
embeddings.append(combined_features)
frame_idx += 1
cap.release()
return embeddings
def extract_combined_features(image):
# Example: Combine features from FaceNet and CLIP (code for this is assumed to be defined already)
facenet_features = extract_facenet_features(image)
clip_features = extract_clip_features(image)
if facenet_features is None:
return None
# Combine (concatenate) the features from FaceNet and CLIP
combined_features = np.concatenate((facenet_features, clip_features))
return combined_features
def extract_facenet_features(image):
# Example function for FaceNet feature extraction
pass
def extract_clip_features(image):
# Example function for CLIP feature extraction
pass
def predict_video(video_path):
embeddings = extract_faces_from_video(video_path, time_interval_sec=10)
if not embeddings:
print("[WARN] No faces found in video.")
return
# Predict using the classifier
preds = clf.predict(embeddings)
# Majority voting for final prediction
final_pred = np.bincount(preds).argmax() # Most frequent label
print(f"\n🧠 Final Video Prediction: {label_map[final_pred]} ({len(preds)} frame(s) used)")
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print("Usage: python scripts/predict_video.py <video_path>")
sys.exit(1)
# Run the video prediction function
predict_video(sys.argv[1])
|