File size: 3,235 Bytes
2c0da97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import pickle
import numpy as np
from deepface import DeepFace
import gc

# Configure models to match our Consensus Engine
MODELS = ["ArcFace", "Facenet512"]
DB_PATH = "faces_db"
PICKLE_PATH = "core/embeddings.pickle"

def generate_hard_pickle():
    """

    Advanced Identity Encoder:

    Creates a 'Centroid Vector' for each person. This is far more robust

    to aging and lighting than single-image matching.

    """
    print("🚀 Starting Advanced Face Encoding...")
    
    if not os.path.exists(DB_PATH):
        print(f"❌ Error: {DB_PATH} folder not found!")
        return

    # Structure: { "Name": { "ArcFace": [vec], "Facenet512": [vec] } }
    identity_map = {}

    # 1. Scan the database
    image_files = [f for f in os.listdir(DB_PATH) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    if not image_files:
        print("⚠️ No images found in faces_db!")
        return

    for filename in image_files:
        # Extract name (e.g., 'Shashank_Kumar.jpg' -> 'Shashank_Kumar')
        name = os.path.splitext(filename)[0].split('_')[0] # Groups by first name
        img_path = os.path.join(DB_PATH, filename)
        
        print(f"📸 Encoding: {name} ({filename})...")
        
        if name not in identity_map:
            identity_map[name] = {m: [] for n in MODELS}

        try:
            for model in MODELS:
                # Extract 128/512 dimension embedding
                # We use enforce_detection=True here to ensure we only pickle REAL faces
                embedding_objs = DeepFace.represent(img_path=img_path, model_name=model, 
                                                   enforce_detection=True, detector_backend="retinaface")
                
                if embedding_objs:
                    vector = np.array(embedding_objs[0]["embedding"])
                    # L2 Normalization makes the vector age-resistant
                    norm_vector = vector / np.linalg.norm(vector)
                    identity_map[name][model].append(norm_vector)
                    
        except Exception as e:
            print(f"⚠️ Skip {filename}: {e}")

    # 2. Create the 'Master Identity Vectors' (Centroids)
    final_embeddings = {}
    
    for name, model_data in identity_map.items():
        final_embeddings[name] = {}
        for model, vectors in model_data.items():
            if vectors:
                # Average all vectors for this person to create a 'Super-Vector'
                # This mathematically finds the 'Center' of their identity
                centroid = np.mean(vectors, axis=0)
                # Re-normalize the average
                final_embeddings[name][model] = centroid / np.linalg.norm(centroid)
                print(f"✅ Created Master Identity for {name} [{model}] using {len(vectors)} samples.")

    # 3. Save the Hard Pickle
    with open(PICKLE_PATH, "wb") as f:
        pickle.dump(final_embeddings, f)
    
    print(f"

✨ SUCCESS: Hard Pickle saved to {PICKLE_PATH}")
    print("This file contains the mathematical essence of your students.")
    gc.collect()

if __name__ == "__main__":
    generate_hard_pickle()