deepScanAPIFRFR / scripts /extract_features.py
kautilya286's picture
first commit
1e4485c
import os
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from facenet_pytorch import InceptionResnetV1, MTCNN
from transformers import CLIPProcessor, CLIPModel
import albumentations as A
import cv2
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {device}")
# Initialize models
mtcnn = MTCNN(image_size=160, device=device)
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
# Load CLIP model and processor
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Input data folders
DATA_DIR = "data"
CATEGORIES = ["real", "deepfake", "ai_gen"]
# Output path
os.makedirs("features", exist_ok=True)
# Data augmentation pipeline
augment = A.Compose([
A.RandomBrightnessContrast(p=0.2),
A.HorizontalFlip(p=0.5),
A.Rotate(limit=10, p=0.3),
A.MotionBlur(p=0.2),
A.Resize(160, 160), # For MTCNN size requirement
])
def extract_facenet_features(img_path):
image = Image.open(img_path).convert("RGB")
# Resize image before passing it to MTCNN
img_np = np.array(image)
img_resized = cv2.resize(img_np, (160, 160)) # Resize image to 160x160
# Apply augmentation
augmented = augment(image=img_resized)["image"]
img_aug = Image.fromarray(augmented)
# Face detection using MTCNN
face = mtcnn(img_aug)
if face is None:
print(f"[WARN] No face detected in {img_path}")
return None
face = face.unsqueeze(0).to(device)
# Feature extraction using FaceNet
with torch.no_grad():
face_emb = facenet(face)
return face_emb.squeeze().cpu().numpy()
def extract_clip_features(img_path):
image = Image.open(img_path).convert("RGB")
# Apply the same augmentation to the image before passing to CLIP
img_np = np.array(image)
augmented = augment(image=img_np)["image"]
img_aug = Image.fromarray(augmented)
# Extract features using CLIP
inputs = clip_processor(images=img_aug, return_tensors="pt").to(device)
with torch.no_grad():
clip_outputs = clip_model.get_image_features(**inputs)
return clip_outputs.cpu().numpy().squeeze()
def extract_combined_features(img_path):
# Extract features from both FaceNet and CLIP
facenet_features = extract_facenet_features(img_path)
clip_features = extract_clip_features(img_path)
if facenet_features is None:
return None
# Combine (concatenate) the features from FaceNet and CLIP
combined_features = np.concatenate((facenet_features, clip_features))
return combined_features
def extract_all_features():
X, y = [], []
for label, category in enumerate(CATEGORIES):
folder = os.path.join(DATA_DIR, category)
if not os.path.isdir(folder):
print(f"[WARN] Missing folder: {folder}")
continue
print(f"\n🧠 Extracting from: {category} ({folder})")
for fname in tqdm(os.listdir(folder)):
if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
continue
path = os.path.join(folder, fname)
combined_features = extract_combined_features(path)
if combined_features is not None:
X.append(combined_features)
y.append(label)
# Save the extracted features
np.save("../features/embeddings.npy", np.array(X))
np.save("../features/labels.npy", np.array(y))
print(f"\n✅ Done: Saved {len(X)} embeddings.")
if __name__ == "__main__":
extract_all_features()