FaceRecog_hf / embed.py
tjrlgns09's picture
.
02a7bf9
import os
import cv2
import faiss
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
import insightface
import albumentations as A
# πŸ”§ 증강 μ„€μ •
augment = A.Compose([
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.3),
A.Rotate(limit=15, p=0.3),
])
# πŸš€ λͺ¨λΈ μ΄ˆκΈ°ν™” ν•¨μˆ˜
def load_face_model(device: str = "cpu"):
providers = ["CPUExecutionProvider"] if device == "cpu" else ["CUDAExecutionProvider"]
model = insightface.app.FaceAnalysis(name='buffalo_l', providers=providers)
model.prepare(ctx_id=0 if device != "cpu" else -1)
return model
# πŸš€ μž„λ² λ”© μΆ”μΆœ ν•¨μˆ˜
def get_face_embedding(image_path: str, model, n_augment: int = 5):
img = cv2.imread(str(image_path))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
embeddings = []
# 원본
faces = model.get(img)
if faces:
embeddings.append(faces[0].embedding)
else:
print(f"❌ μ–Όκ΅΄ 인식 μ‹€νŒ¨ (원본): {image_path}")
# 증강
for i in range(n_augment):
augmented = augment(image=img)
img_aug = augmented['image']
faces = model.get(img_aug)
if faces:
embeddings.append(faces[0].embedding)
else:
print(f"❌ μ–Όκ΅΄ 인식 μ‹€νŒ¨ (증강 {i+1}): {image_path}")
if embeddings:
return np.mean(embeddings, axis=0)
else:
print(f"❌ λͺ¨λ“  μ‹œλ„ μ‹€νŒ¨: {image_path}")
return None
# πŸš€ 폴더 μŠ€μΊ” 및 μž„λ² λ”© μΆ”μΆœ
def process_folder(data_folder: str, model) -> pd.DataFrame:
data = []
data_path = Path(data_folder)
for person_dir in data_path.iterdir():
if not person_dir.is_dir():
continue
label = person_dir.name
print(f"β–Ά 폴더: {label}")
count = 0
for image_path in person_dir.glob("*"):
if image_path.suffix.lower() not in [".jpg", ".jpeg", ".png"]:
continue
emb = get_face_embedding(image_path, model)
if emb is not None:
data.append({
"label": label,
"image_path": str(image_path),
"embedding": emb
})
count += 1
print(f"βœ… μ–Όκ΅΄ 인식 성곡 수: {count}")
return pd.DataFrame(data)
# πŸš€ FAISS 인덱슀 생성 및 μ €μž₯
def build_and_save_faiss(train_df: pd.DataFrame, save_path: str):
embeddings = np.stack(train_df['embedding'].values).astype('float32')
embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)
faiss.write_index(index, os.path.join(save_path, "faiss_index.index"))
labels = train_df['label'].tolist()
with open(os.path.join(save_path, "faiss_labels.pkl"), "wb") as f:
pickle.dump(labels, f)
# 전체 λ°μ΄ν„°ν”„λ ˆμž„ μ €μž₯ (선택)
train_df.to_pickle(os.path.join(save_path, "train_df.pkl"))
print("βœ… FAISS 인덱슀 & 라벨 μ €μž₯ μ™„λ£Œ")
return index, labels, train_df
# πŸš€ 전체 μ‹€ν–‰ ν•¨μˆ˜
def run_pipeline(data_folder: str, save_path: str, device: str = "cpu"):
os.makedirs(save_path, exist_ok=True)
print("πŸš€ μ–Όκ΅΄ λͺ¨λΈ λΆˆλŸ¬μ˜€λŠ” 쀑...")
model = load_face_model(device)
print("πŸš€ μž„λ² λ”© μΆ”μΆœ μ‹œμž‘...")
train_df = process_folder(data_folder, model)
print("πŸš€ FAISS 인덱슀 생성 및 μ €μž₯ 쀑...")
index, labels, df = build_and_save_faiss(train_df, save_path)
return index, labels, df
data_folder = "./person"
save_path = "./embedding/person"
index, labels, df = run_pipeline(data_folder, save_path, device="cpu")