File size: 3,694 Bytes
02a7bf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import cv2
import faiss
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
import insightface
import albumentations as A

# πŸ”§ 증강 μ„€μ •
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.Rotate(limit=15, p=0.3),
])

# πŸš€ λͺ¨λΈ μ΄ˆκΈ°ν™” ν•¨μˆ˜
def load_face_model(device: str = "cpu"):
    providers = ["CPUExecutionProvider"] if device == "cpu" else ["CUDAExecutionProvider"]
    model = insightface.app.FaceAnalysis(name='buffalo_l', providers=providers)
    model.prepare(ctx_id=0 if device != "cpu" else -1)
    return model

# πŸš€ μž„λ² λ”© μΆ”μΆœ ν•¨μˆ˜
def get_face_embedding(image_path: str, model, n_augment: int = 5):
    img = cv2.imread(str(image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    embeddings = []

    # 원본
    faces = model.get(img)
    if faces:
        embeddings.append(faces[0].embedding)
    else:
        print(f"❌ μ–Όκ΅΄ 인식 μ‹€νŒ¨ (원본): {image_path}")

    # 증강
    for i in range(n_augment):
        augmented = augment(image=img)
        img_aug = augmented['image']
        faces = model.get(img_aug)
        if faces:
            embeddings.append(faces[0].embedding)
        else:
            print(f"❌ μ–Όκ΅΄ 인식 μ‹€νŒ¨ (증강 {i+1}): {image_path}")

    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        print(f"❌ λͺ¨λ“  μ‹œλ„ μ‹€νŒ¨: {image_path}")
        return None

# πŸš€ 폴더 μŠ€μΊ” 및 μž„λ² λ”© μΆ”μΆœ
def process_folder(data_folder: str, model) -> pd.DataFrame:
    data = []
    data_path = Path(data_folder)
    for person_dir in data_path.iterdir():
        if not person_dir.is_dir():
            continue
        label = person_dir.name
        print(f"β–Ά 폴더: {label}")
        count = 0
        for image_path in person_dir.glob("*"):
            if image_path.suffix.lower() not in [".jpg", ".jpeg", ".png"]:
                continue
            emb = get_face_embedding(image_path, model)
            if emb is not None:
                data.append({
                    "label": label,
                    "image_path": str(image_path),
                    "embedding": emb
                })
                count += 1
        print(f"βœ… μ–Όκ΅΄ 인식 성곡 수: {count}")
    return pd.DataFrame(data)

# πŸš€ FAISS 인덱슀 생성 및 μ €μž₯
def build_and_save_faiss(train_df: pd.DataFrame, save_path: str):
    embeddings = np.stack(train_df['embedding'].values).astype('float32')
    embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)

    index = faiss.IndexFlatIP(embeddings.shape[1])
    index.add(embeddings)
    faiss.write_index(index, os.path.join(save_path, "faiss_index.index"))

    labels = train_df['label'].tolist()
    with open(os.path.join(save_path, "faiss_labels.pkl"), "wb") as f:
        pickle.dump(labels, f)

    # 전체 λ°μ΄ν„°ν”„λ ˆμž„ μ €μž₯ (선택)
    train_df.to_pickle(os.path.join(save_path, "train_df.pkl"))

    print("βœ… FAISS 인덱슀 & 라벨 μ €μž₯ μ™„λ£Œ")
    return index, labels, train_df

# πŸš€ 전체 μ‹€ν–‰ ν•¨μˆ˜
def run_pipeline(data_folder: str, save_path: str, device: str = "cpu"):
    os.makedirs(save_path, exist_ok=True)
    print("πŸš€ μ–Όκ΅΄ λͺ¨λΈ λΆˆλŸ¬μ˜€λŠ” 쀑...")
    model = load_face_model(device)

    print("πŸš€ μž„λ² λ”© μΆ”μΆœ μ‹œμž‘...")
    train_df = process_folder(data_folder, model)

    print("πŸš€ FAISS 인덱슀 생성 및 μ €μž₯ 쀑...")
    index, labels, df = build_and_save_faiss(train_df, save_path)

    return index, labels, df


data_folder = "./person"
save_path = "./embedding/person"
index, labels, df = run_pipeline(data_folder, save_path, device="cpu")