Spaces:

kautilya286
/

deepScanAPIFRFR

Runtime error

App Files Files Community

deepScanAPIFRFR / scripts /extract_features.py

kautilya286

first commit

1e4485c 9 months ago

raw

history blame contribute delete

3.67 kB

	import os
	import torch
	import numpy as np
	from PIL import Image
	from tqdm import tqdm
	from facenet_pytorch import InceptionResnetV1, MTCNN
	from transformers import CLIPProcessor, CLIPModel
	import albumentations as A
	import cv2

	# Set device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"[INFO] Using device: {device}")

	# Initialize models
	mtcnn = MTCNN(image_size=160, device=device)
	facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

	# Load CLIP model and processor
	clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
	clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

	# Input data folders
	DATA_DIR = "data"
	CATEGORIES = ["real", "deepfake", "ai_gen"]

	# Output path
	os.makedirs("features", exist_ok=True)

	# Data augmentation pipeline
	augment = A.Compose([
	A.RandomBrightnessContrast(p=0.2),
	A.HorizontalFlip(p=0.5),
	A.Rotate(limit=10, p=0.3),
	A.MotionBlur(p=0.2),
	A.Resize(160, 160), # For MTCNN size requirement
	])

	def extract_facenet_features(img_path):
	image = Image.open(img_path).convert("RGB")

	# Resize image before passing it to MTCNN
	img_np = np.array(image)
	img_resized = cv2.resize(img_np, (160, 160)) # Resize image to 160x160

	# Apply augmentation
	augmented = augment(image=img_resized)["image"]
	img_aug = Image.fromarray(augmented)

	# Face detection using MTCNN
	face = mtcnn(img_aug)
	if face is None:
	print(f"[WARN] No face detected in {img_path}")
	return None
	face = face.unsqueeze(0).to(device)

	# Feature extraction using FaceNet
	with torch.no_grad():
	face_emb = facenet(face)

	return face_emb.squeeze().cpu().numpy()

	def extract_clip_features(img_path):
	image = Image.open(img_path).convert("RGB")

	# Apply the same augmentation to the image before passing to CLIP
	img_np = np.array(image)
	augmented = augment(image=img_np)["image"]
	img_aug = Image.fromarray(augmented)

	# Extract features using CLIP
	inputs = clip_processor(images=img_aug, return_tensors="pt").to(device)
	with torch.no_grad():
	clip_outputs = clip_model.get_image_features(**inputs)

	return clip_outputs.cpu().numpy().squeeze()

	def extract_combined_features(img_path):
	# Extract features from both FaceNet and CLIP
	facenet_features = extract_facenet_features(img_path)
	clip_features = extract_clip_features(img_path)

	if facenet_features is None:
	return None

	# Combine (concatenate) the features from FaceNet and CLIP
	combined_features = np.concatenate((facenet_features, clip_features))
	return combined_features

	def extract_all_features():
	X, y = [], []
	for label, category in enumerate(CATEGORIES):
	folder = os.path.join(DATA_DIR, category)
	if not os.path.isdir(folder):
	print(f"[WARN] Missing folder: {folder}")
	continue

	print(f"\n🧠 Extracting from: {category} ({folder})")
	for fname in tqdm(os.listdir(folder)):
	if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
	continue
	path = os.path.join(folder, fname)
	combined_features = extract_combined_features(path)
	if combined_features is not None:
	X.append(combined_features)
	y.append(label)

	# Save the extracted features
	np.save("../features/embeddings.npy", np.array(X))
	np.save("../features/labels.npy", np.array(y))
	print(f"\n✅ Done: Saved {len(X)} embeddings.")

	if __name__ == "__main__":
	extract_all_features()