import os import pickle import numpy as np from deepface import DeepFace import gc # Configure models to match our Consensus Engine MODELS = ["ArcFace", "Facenet512"] DB_PATH = "faces_db" PICKLE_PATH = "core/embeddings.pickle" def generate_hard_pickle(): """ Advanced Identity Encoder: Creates a 'Centroid Vector' for each person. This is far more robust to aging and lighting than single-image matching. """ print("🚀 Starting Advanced Face Encoding...") if not os.path.exists(DB_PATH): print(f"❌ Error: {DB_PATH} folder not found!") return # Structure: { "Name": { "ArcFace": [vec], "Facenet512": [vec] } } identity_map = {} # 1. Scan the database image_files = [f for f in os.listdir(DB_PATH) if f.lower().endswith(('.jpg', '.jpeg', '.png'))] if not image_files: print("⚠️ No images found in faces_db!") return for filename in image_files: # Extract name (e.g., 'Shashank_Kumar.jpg' -> 'Shashank_Kumar') name = os.path.splitext(filename)[0].split('_')[0] # Groups by first name img_path = os.path.join(DB_PATH, filename) print(f"📸 Encoding: {name} ({filename})...") if name not in identity_map: identity_map[name] = {m: [] for n in MODELS} try: for model in MODELS: # Extract 128/512 dimension embedding # We use enforce_detection=True here to ensure we only pickle REAL faces embedding_objs = DeepFace.represent(img_path=img_path, model_name=model, enforce_detection=True, detector_backend="retinaface") if embedding_objs: vector = np.array(embedding_objs[0]["embedding"]) # L2 Normalization makes the vector age-resistant norm_vector = vector / np.linalg.norm(vector) identity_map[name][model].append(norm_vector) except Exception as e: print(f"⚠️ Skip {filename}: {e}") # 2. Create the 'Master Identity Vectors' (Centroids) final_embeddings = {} for name, model_data in identity_map.items(): final_embeddings[name] = {} for model, vectors in model_data.items(): if vectors: # Average all vectors for this person to create a 'Super-Vector' # This mathematically finds the 'Center' of their identity centroid = np.mean(vectors, axis=0) # Re-normalize the average final_embeddings[name][model] = centroid / np.linalg.norm(centroid) print(f"✅ Created Master Identity for {name} [{model}] using {len(vectors)} samples.") # 3. Save the Hard Pickle with open(PICKLE_PATH, "wb") as f: pickle.dump(final_embeddings, f) print(f" ✨ SUCCESS: Hard Pickle saved to {PICKLE_PATH}") print("This file contains the mathematical essence of your students.") gc.collect() if __name__ == "__main__": generate_hard_pickle()