Spaces:

Konthee
/

face-recognition

Sleeping

App Files Files Community

face-recognition / app.py

Konthee

Add Real-ESRGAN face enhancement and log preview updates

15f8874 about 1 month ago

raw

history blame contribute delete

48.4 kB

	from __future__ import annotations

	import base64
	import html
	import json
	import math
	import os
	import re
	import shutil
	import sys
	import urllib.request
	import uuid
	from datetime import datetime
	from io import BytesIO
	from pathlib import Path
	from typing import Any, Dict, List, Tuple

	import cv2
	import gradio as gr
	import numpy as np
	from PIL import Image
	from deepface import DeepFace
	from deepface.modules import modeling
	from retinaface import RetinaFace

	try:
	try:
	import torchvision.transforms._functional_tensor as tv_functional_tensor

	sys.modules.setdefault("torchvision.transforms.functional_tensor", tv_functional_tensor)
	except Exception:
	pass

	import torch
	from basicsr.archs.rrdbnet_arch import RRDBNet
	from realesrgan import RealESRGANer
	except Exception:
	torch = None
	RRDBNet = None
	RealESRGANer = None

	BASE_DIR = Path(__file__).resolve().parent
	DB_DIR = BASE_DIR / "face_db"
	OUTPUT_DIR = BASE_DIR / "outputs"
	EMBEDDINGS_FILE = BASE_DIR / "face_db_embeddings.json"

	RECOGNITION_MODEL = "ArcFace"
	MAX_LOG_ITEMS = 120
	ESRGAN_MODEL_URL = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
	ESRGAN_MODEL_FILENAME = "RealESRGAN_x4plus.pth"
	ESRGAN_SCALE = 4
	ESRGAN_MAX_DIMENSION = 1024

	_ESRGAN_MODEL = None

	for folder in (DB_DIR, OUTPUT_DIR):
	folder.mkdir(parents=True, exist_ok=True)

	CUSTOM_CSS = """
	:root {
	--sky-1: #f6fbff;
	--sky-2: #eef7ff;
	--sky-3: #dbeeff;
	--sky-4: #b9ddff;
	--sky-5: #2563eb;
	--sky-6: #0f172a;
	}
	.gradio-container {
	background: linear-gradient(180deg, #f8fbff 0%, #f1f8ff 100%);
	}
	.app-shell {
	max-width: 1480px;
	margin: 0 auto;
	}
	.hero-card {
	background: linear-gradient(135deg, rgba(37, 99, 235, 0.10), rgba(125, 211, 252, 0.16));
	border: 1px solid rgba(37, 99, 235, 0.16);
	border-radius: 24px;
	padding: 20px 24px;
	margin-bottom: 14px;
	box-shadow: 0 12px 34px rgba(37, 99, 235, 0.08);
	}
	.panel {
	background: rgba(255, 255, 255, 0.90);
	border: 1px solid rgba(148, 163, 184, 0.18);
	border-radius: 24px;
	padding: 14px;
	box-shadow: 0 14px 36px rgba(15, 23, 42, 0.06);
	}
	.soft-note {
	background: linear-gradient(180deg, rgba(239, 246, 255, 0.95), rgba(255, 255, 255, 0.96));
	border: 1px dashed rgba(37, 99, 235, 0.25);
	border-radius: 18px;
	padding: 12px 14px;
	color: #1e3a8a;
	}
	.blue-btn button,
	.blue-btn button:hover {
	background: linear-gradient(90deg, #2563eb 0%, #38bdf8 100%) !important;
	color: white !important;
	border: none !important;
	}
	.neutral-btn button,
	.neutral-btn button:hover {
	background: white !important;
	color: #0f172a !important;
	border: 1px solid rgba(148, 163, 184, 0.35) !important;
	}
	.status-box {
	background: white;
	border: 1px solid rgba(37, 99, 235, 0.12);
	border-radius: 18px;
	padding: 12px 14px;
	}
	.log-feed {
	display: flex;
	flex-direction: column;
	gap: 12px;
	}
	.log-card {
	display: grid;
	grid-template-columns: auto 1fr auto;
	gap: 14px;
	align-items: start;
	background: linear-gradient(180deg, rgba(255,255,255,0.98), rgba(248,250,252,0.96));
	border: 1px solid rgba(148, 163, 184, 0.22);
	border-radius: 20px;
	padding: 12px;
	box-shadow: 0 10px 28px rgba(15, 23, 42, 0.06);
	}
	.log-thumb-wrap {
	display: flex;
	flex-direction: column;
	gap: 6px;
	}
	.log-thumb {
	display: block;
	max-width: none;
	height: auto;
	border-radius: 14px;
	background: #e2e8f0;
	}
	.log-thumb-label {
	font-size: 12px;
	font-weight: 700;
	color: #475569;
	text-transform: uppercase;
	letter-spacing: 0.04em;
	}
	.log-main {
	min-width: 0;
	}
	.log-topline {
	display: flex;
	flex-wrap: wrap;
	gap: 10px;
	align-items: center;
	margin-bottom: 6px;
	}
	.log-name {
	font-size: 28px;
	font-weight: 800;
	color: #1e293b;
	line-height: 1.05;
	}
	.log-badge {
	display: inline-flex;
	align-items: center;
	border-radius: 999px;
	padding: 6px 14px;
	font-size: 18px;
	font-weight: 800;
	color: white;
	}
	.log-badge.matched {
	background: linear-gradient(90deg, #16a34a 0%, #22c55e 100%);
	}
	.log-badge.unknown {
	background: linear-gradient(90deg, #dc2626 0%, #ef4444 100%);
	}
	.log-meta {
	display: flex;
	flex-wrap: wrap;
	gap: 14px;
	color: #334155;
	font-size: 15px;
	margin-bottom: 8px;
	}
	.log-reason {
	color: #1e40af;
	font-size: 15px;
	line-height: 1.5;
	}
	.log-time {
	text-align: right;
	color: #64748b;
	font-size: 16px;
	font-weight: 700;
	white-space: nowrap;
	}
	.log-empty {
	padding: 22px;
	border-radius: 18px;
	border: 1px dashed rgba(148, 163, 184, 0.35);
	color: #475569;
	background: rgba(255,255,255,0.78);
	}
	"""

	def sanitize_username(username: str) -> str:
	cleaned = re.sub(r"[^a-zA-Z0-9ก-๙_-]+", "_", username.strip())
	cleaned = re.sub(r"_+", "_", cleaned).strip("_")
	return cleaned or "user"


	def to_uint8_rgb(image: Any) -> np.ndarray:
	arr = np.array(image)
	if arr.ndim == 2:
	arr = cv2.cvtColor(arr, cv2.COLOR_GRAY2RGB)
	if arr.ndim == 3 and arr.shape[2] == 4:
	arr = cv2.cvtColor(arr, cv2.COLOR_RGBA2RGB)
	if arr.dtype != np.uint8:
	if np.max(arr) <= 1.0:
	arr = (arr * 255.0).clip(0, 255).astype(np.uint8)
	else:
	arr = arr.clip(0, 255).astype(np.uint8)
	return arr


	def save_rgb_image(path: Path, rgb: np.ndarray) -> None:
	path.parent.mkdir(parents=True, exist_ok=True)
	bgr = cv2.cvtColor(to_uint8_rgb(rgb), cv2.COLOR_RGB2BGR)
	cv2.imwrite(str(path), bgr)


	def expand_bbox(bbox: List[float], image_shape: Tuple[int, int, int], expand_pct: float = 12.0) -> List[int]:
	h, w = image_shape[:2]
	x1, y1, x2, y2 = [int(v) for v in bbox]
	bw, bh = max(1, x2 - x1), max(1, y2 - y1)
	pad_x = int(bw * (expand_pct / 100.0))
	pad_y = int(bh * (expand_pct / 100.0))
	x1 = max(0, x1 - pad_x)
	y1 = max(0, y1 - pad_y)
	x2 = min(w, x2 + pad_x)
	y2 = min(h, y2 + pad_y)
	return [x1, y1, x2, y2]


	def crop_rgb(rgb: np.ndarray, bbox: List[int]) -> np.ndarray:
	x1, y1, x2, y2 = bbox
	return rgb[y1:y2, x1:x2].copy()


	def looks_like_face_crop(rgb: np.ndarray) -> bool:
	h, w = rgb.shape[:2]
	if min(h, w) < 48:
	return False
	ratio = w / max(h, 1)
	return 0.55 <= ratio <= 1.8


	def get_largest_face(detections: Any) -> Dict[str, Any] \| None:
	if not isinstance(detections, dict):
	return None
	candidates = []
	for _, face in detections.items():
	facial_area = face.get("facial_area")
	if not facial_area or len(facial_area) != 4:
	continue
	x1, y1, x2, y2 = [int(v) for v in facial_area]
	area = max(0, x2 - x1) * max(0, y2 - y1)
	candidates.append((area, face))
	if not candidates:
	return None
	return sorted(candidates, key=lambda x: x[0], reverse=True)[0][1]


	def detect_face_with_fallback(rgb: np.ndarray) -> Dict[str, Any] \| None:
	try:
	detections = RetinaFace.detect_faces(rgb)
	except Exception:
	detections = {}

	face = get_largest_face(detections)
	if face is not None:
	return face

	h, w = rgb.shape[:2]
	min_side = min(h, w)
	if min_side < 160:
	scale = max(2, int(math.ceil(160 / max(1, min_side))))
	enlarged = cv2.resize(rgb, (w * scale, h * scale), interpolation=cv2.INTER_CUBIC)
	try:
	detections = RetinaFace.detect_faces(enlarged)
	except Exception:
	detections = {}

	face = get_largest_face(detections)
	if face is not None:
	x1, y1, x2, y2 = [int(v) for v in face["facial_area"]]
	face["facial_area"] = [
	max(0, x1 // scale),
	max(0, y1 // scale),
	min(w, x2 // scale),
	min(h, y2 // scale),
	]
	return face

	return None


	def align_face_with_retinaface(rgb: np.ndarray, bbox: List[int], expand_pct: float = 12.0) -> np.ndarray:
	expanded = expand_bbox(bbox, rgb.shape, expand_pct=expand_pct)
	face_crop = crop_rgb(rgb, expanded)
	if face_crop.size == 0:
	raise ValueError("ไม่สามารถ crop ใบหน้าจากภาพได้")
	try:
	aligned_faces = RetinaFace.extract_faces(img_path=face_crop, align=True, expand_face_area=0)
	if aligned_faces:
	return to_uint8_rgb(aligned_faces[0])
	except Exception:
	pass
	return to_uint8_rgb(face_crop)


	def load_esrgan_model():
	global _ESRGAN_MODEL
	if _ESRGAN_MODEL is not None:
	return _ESRGAN_MODEL
	if torch is None or RRDBNet is None or RealESRGANer is None:
	raise RuntimeError("ยังไม่ได้ติดตั้ง torch / realesrgan / basicsr สำหรับ Real-ESRGAN")

	weights_dir = BASE_DIR / "weights"
	weights_dir.mkdir(parents=True, exist_ok=True)
	model_path = weights_dir / ESRGAN_MODEL_FILENAME
	if not model_path.exists():
	urllib.request.urlretrieve(ESRGAN_MODEL_URL, model_path)

	model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=ESRGAN_SCALE)
	if torch.cuda.is_available():
	gpu_id = 0
	else:
	gpu_id = None

	_ESRGAN_MODEL = RealESRGANer(
	scale=ESRGAN_SCALE,
	model_path=str(model_path),
	model=model,
	tile=0,
	tile_pad=10,
	pre_pad=0,
	half=bool(torch.cuda.is_available()),
	gpu_id=gpu_id,
	)
	return _ESRGAN_MODEL


	def trim_image_for_esrgan(rgb: np.ndarray) -> np.ndarray:
	h, w = rgb.shape[:2]
	trimmed_h = h - (h % 4)
	trimmed_w = w - (w % 4)
	if trimmed_h <= 0 or trimmed_w <= 0:
	raise ValueError("ภาพเล็กเกินไปสำหรับ ESRGAN")
	if trimmed_h == h and trimmed_w == w:
	return rgb
	return rgb[:trimmed_h, :trimmed_w]


	def maybe_downscale_for_esrgan(rgb: np.ndarray, max_dimension: int = ESRGAN_MAX_DIMENSION) -> np.ndarray:
	h, w = rgb.shape[:2]
	longest_side = max(h, w)
	if longest_side <= max_dimension:
	return rgb
	scale = max_dimension / float(longest_side)
	new_w = max(4, int(round(w * scale)))
	new_h = max(4, int(round(h * scale)))
	resized = cv2.resize(rgb, (new_w, new_h), interpolation=cv2.INTER_AREA)
	return trim_image_for_esrgan(resized)


	def enhance_with_esrgan(rgb: np.ndarray) -> np.ndarray:
	model = load_esrgan_model()
	prepared = maybe_downscale_for_esrgan(trim_image_for_esrgan(to_uint8_rgb(rgb)))
	sr_rgb, _ = model.enhance(prepared, outscale=ESRGAN_SCALE)
	return to_uint8_rgb(sr_rgb)


	def maybe_enhance_for_embedding(rgb: np.ndarray, use_esrgan: bool) -> np.ndarray:
	if not use_esrgan:
	return to_uint8_rgb(rgb)
	return enhance_with_esrgan(rgb)


	def seconds_to_hhmmss(seconds: float) -> str:
	total_ms = int(max(0, seconds) * 1000)
	hours = total_ms // 3600000
	minutes = (total_ms % 3600000) // 60000
	secs = (total_ms % 60000) // 1000
	ms = total_ms % 1000
	return f"{hours:02d}:{minutes:02d}:{secs:02d}.{ms:03d}"


	def cosine_distance(vec1: List[float], vec2: List[float]) -> float:
	a = np.asarray(vec1, dtype=np.float32)
	b = np.asarray(vec2, dtype=np.float32)
	denom = float(np.linalg.norm(a) * np.linalg.norm(b))
	if denom == 0:
	return 1.0
	similarity = float(np.dot(a, b) / denom)
	similarity = max(-1.0, min(1.0, similarity))
	return 1.0 - similarity


	def embeddings_template() -> Dict[str, Any]:
	return {"model_name": RECOGNITION_MODEL, "entries": []}


	def reset_face_database_on_startup() -> None:
	if DB_DIR.exists():
	for child in DB_DIR.iterdir():
	if child.is_dir():
	shutil.rmtree(child, ignore_errors=True)
	elif child.is_file():
	child.unlink(missing_ok=True)
	save_embeddings(embeddings_template())


	def load_embeddings() -> Dict[str, Any]:
	if not EMBEDDINGS_FILE.exists():
	return embeddings_template()
	try:
	with open(EMBEDDINGS_FILE, "r", encoding="utf-8") as f:
	data = json.load(f)
	if "entries" not in data:
	return embeddings_template()
	return data
	except Exception:
	return embeddings_template()


	def save_embeddings(data: Dict[str, Any]) -> None:
	with open(EMBEDDINGS_FILE, "w", encoding="utf-8") as f:
	json.dump(data, f, ensure_ascii=False, indent=2)


	reset_face_database_on_startup()


	def represent_image(image: Path \| np.ndarray) -> List[float]:
	image_input: str \| np.ndarray
	if isinstance(image, Path):
	image_input = str(image)
	else:
	image_input = to_uint8_rgb(image)
	objs = DeepFace.represent(
	img_path=image_input,
	model_name=RECOGNITION_MODEL,
	detector_backend="skip",
	align=False,
	enforce_detection=False,
	normalization="ArcFace",
	)
	if isinstance(objs, list) and objs:
	first = objs[0]
	if isinstance(first, dict) and "embedding" in first:
	return first["embedding"]
	if isinstance(objs, dict) and "embedding" in objs:
	return objs["embedding"]
	raise ValueError("ไม่สามารถสร้าง embedding จากภาพใบหน้าได้")


	def represent_images_batch(images: List[np.ndarray]) -> List[List[float]]:
	if not images:
	return []

	batch_input = [to_uint8_rgb(img) for img in images]
	objs = DeepFace.represent(
	img_path=batch_input,
	model_name=RECOGNITION_MODEL,
	detector_backend="skip",
	align=False,
	enforce_detection=False,
	normalization="ArcFace",
	)

	if not isinstance(objs, list):
	raise ValueError("รูปแบบผลลัพธ์จาก batch recognition ไม่ถูกต้อง")

	embeddings: List[List[float]] = []
	for item in objs:
	if isinstance(item, list) and item:
	first = item[0]
	if isinstance(first, dict) and "embedding" in first:
	embeddings.append(first["embedding"])
	continue
	if isinstance(item, dict) and "embedding" in item:
	embeddings.append(item["embedding"])
	continue
	raise ValueError("ไม่สามารถสร้าง embedding แบบ batch จากภาพใบหน้าได้")

	return embeddings


	def prepare_reference_embeddings(entries: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], np.ndarray, np.ndarray]:
	valid_entries: List[Dict[str, Any]] = []
	vectors: List[np.ndarray] = []

	for entry in entries:
	ref_embedding = entry.get("embedding")
	if not ref_embedding:
	continue
	vec = np.asarray(ref_embedding, dtype=np.float32)
	if vec.ndim != 1 or vec.size == 0:
	continue
	valid_entries.append(entry)
	vectors.append(vec)

	if not vectors:
	return valid_entries, np.empty((0, 0), dtype=np.float32), np.empty((0,), dtype=np.float32)

	matrix = np.stack(vectors, axis=0)
	norms = np.linalg.norm(matrix, axis=1)
	return valid_entries, matrix, norms


	def render_db_overview() -> str:
	data = load_embeddings()
	counts: Dict[str, int] = {}
	display_names: Dict[str, str] = {}
	for entry in data.get("entries", []):
	folder = entry.get("user_folder", "user")
	counts[folder] = counts.get(folder, 0) + 1
	display_names[folder] = entry.get("user_display", folder)

	total_people = len(counts)
	total_faces = sum(counts.values())

	if not counts:
	list_items = "<li>ยังไม่มีข้อมูลลงทะเบียน</li>"
	else:
	ranked = sorted(counts.items(), key=lambda item: item[1], reverse=True)[:12]
	list_items = "".join(
	f"<li><b>{display_names.get(folder, folder)}</b> — {count} ภาพ</li>" for folder, count in ranked
	)

	return f"""
	<div class='soft-note'>
	<div style='display:flex;gap:18px;flex-wrap:wrap;margin-bottom:10px'>
	<div><b>{total_people}</b><br/>บุคคลที่ลงทะเบียน</div>
	<div><b>{total_faces}</b><br/>ภาพใบหน้าในระบบ</div>
	<div><b>{RECOGNITION_MODEL}</b><br/>โมเดล Recognition</div>
	</div>
	<div><b>ฐานข้อมูลปัจจุบัน</b></div>
	<ul style='margin:8px 0 0 18px;padding:0'>{list_items}</ul>
	</div>
	"""


	def load_user_gallery(user_folder: str) -> List[Tuple[str, str]]:
	user_dir = DB_DIR / user_folder
	if not user_dir.exists():
	return []
	images = sorted(
	[p for p in user_dir.iterdir() if p.suffix.lower() in {".jpg", ".jpeg", ".png"}],
	reverse=True,
	)
	gallery = []
	for img_path in images[:24]:
	gallery.append((str(img_path), img_path.stem))
	return gallery


	def rebuild_embeddings() -> Tuple[str, str]:
	data = embeddings_template()
	image_paths = sorted(
	[
	p
	for p in DB_DIR.rglob("*")
	if p.is_file() and p.suffix.lower() in {".jpg", ".jpeg", ".png"}
	]
	)
	success = 0
	failed = 0
	for img_path in image_paths:
	user_folder = img_path.parent.name
	try:
	embedding = represent_image(img_path)
	data["entries"].append(
	{
	"user_folder": user_folder,
	"user_display": user_folder,
	"image_path": str(img_path),
	"embedding": embedding,
	"created_at": datetime.now().isoformat(timespec="seconds"),
	}
	)
	success += 1
	except Exception:
	failed += 1
	save_embeddings(data)
	message = f"✅ รีเฟรชฐานข้อมูลเรียบร้อย: {success} ภาพ \| ข้าม {failed} ภาพ"
	return message, render_db_overview()


	def register_face(image: np.ndarray, username: str, use_esrgan: bool):
	if image is None:
	return "⚠️ กรุณาอัปโหลดรูปสำหรับลงทะเบียน", [], render_db_overview()

	display_name = username.strip()
	if not display_name:
	return "⚠️ กรุณากรอกชื่อ user ก่อนบันทึก", [], render_db_overview()

	user_folder = sanitize_username(display_name)
	rgb = to_uint8_rgb(image)

	try:
	face = detect_face_with_fallback(rgb)
	used_full_image_fallback = False
	if face is not None:
	aligned_face = align_face_with_retinaface(rgb, face["facial_area"], expand_pct=14)
	elif looks_like_face_crop(rgb):
	aligned_face = to_uint8_rgb(rgb)
	used_full_image_fallback = True
	else:
	return "⚠️ ไม่พบใบหน้าในภาพลงทะเบียน", [], render_db_overview()

	embedding_input = maybe_enhance_for_embedding(aligned_face, use_esrgan=use_esrgan)
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	save_path = DB_DIR / user_folder / f"{timestamp}_{uuid.uuid4().hex[:6]}.jpg"
	save_rgb_image(save_path, embedding_input)
	embedding = represent_image(embedding_input)

	data = load_embeddings()
	data.setdefault("entries", [])
	data["entries"].append(
	{
	"user_folder": user_folder,
	"user_display": display_name,
	"image_path": str(save_path),
	"embedding": embedding,
	"created_at": datetime.now().isoformat(timespec="seconds"),
	}
	)
	save_embeddings(data)

	gallery = load_user_gallery(user_folder)
	message = (
	f"✅ บันทึกใบหน้าสำเร็จสำหรับ user: {display_name} \n"
	f"- เก็บไฟล์ที่: `{save_path}` \n"
	f"- ใช้โมเดล recognition: `{RECOGNITION_MODEL}` \n"
	f"- ESRGAN ก่อน embedding: `{'on' if use_esrgan else 'off'}`"
	)
	if used_full_image_fallback:
	message += "\n- หมายเหตุ: ใช้ทั้งภาพเป็น face crop สำหรับลงทะเบียน เพราะ detector ไม่พบใบหน้าจากภาพขนาดเล็ก"
	return message, gallery, render_db_overview()

	except Exception as exc:
	return f"❌ ลงทะเบียนไม่สำเร็จ: {exc}", [], render_db_overview()


	def draw_face_annotations(rgb: np.ndarray, faces: List[Dict[str, Any]]) -> np.ndarray:
	canvas = to_uint8_rgb(rgb).copy()
	for item in faces:
	x1, y1, x2, y2 = item["bbox"]
	identity = item["identity"]
	distance = item["distance"]
	score = item.get("score", 0.0)

	color = (37, 99, 235) if identity != "Unknown" else (14, 165, 233)
	cv2.rectangle(canvas, (x1, y1), (x2, y2), color, 2)

	if math.isfinite(distance):
	label = f"{identity} \| d={distance:.3f}"
	else:
	label = f"{identity} \| conf={score:.2f}"

	(tw, th), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.52, 2)
	label_y1 = max(0, y1 - th - baseline - 10)
	label_y2 = label_y1 + th + baseline + 8
	label_x2 = min(canvas.shape[1], x1 + tw + 12)
	cv2.rectangle(canvas, (x1, label_y1), (label_x2, label_y2), color, -1)
	cv2.putText(
	canvas,
	label,
	(x1 + 6, label_y2 - 6),
	cv2.FONT_HERSHEY_SIMPLEX,
	0.52,
	(255, 255, 255),
	2,
	cv2.LINE_AA,
	)
	return canvas


	def face_to_data_uri(rgb: np.ndarray) -> str:
	image = Image.fromarray(to_uint8_rgb(rgb))
	buffer = BytesIO()
	image.save(buffer, format="JPEG", quality=88)
	encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
	return f"data:image/jpeg;base64,{encoded}"


	def render_log_feed(log_items: List[Dict[str, Any]]) -> str:
	if not log_items:
	return """
	<div class='log-empty'>
	ยังไม่มี face logs ตอนนี้ ระบบจะแสดงชื่อคนที่พบ, ค่า distance, threshold และเหตุผลที่เป็น Unknown ตรงนี้
	</div>
	"""

	cards: List[str] = []
	for item in log_items:
	processed_thumb = item.get("processed_thumbnail", "")
	identity = html.escape(str(item.get("identity", "Unknown")))
	closest = html.escape(str(item.get("best_match_name", "Unknown")))
	status = item.get("status", "Unknown")
	badge_class = "matched" if status == "Matched" else "unknown"
	event_time = html.escape(str(item.get("event_time", "-")))
	video_time = html.escape(str(item.get("video_time", "-")))
	reason = html.escape(str(item.get("reason", "")))
	distance = item.get("distance")
	threshold = item.get("threshold")
	bbox = html.escape(str(item.get("bbox", "-")))
	processed_label = html.escape(str(item.get("processed_label", "Processed preview")))
	processed_width = int(item.get("processed_width", 0) or 0)
	processed_height = int(item.get("processed_height", 0) or 0)
	distance_text = f"{distance:.4f}" if isinstance(distance, (int, float)) and math.isfinite(distance) else "-"
	threshold_text = f"{threshold:.2f}" if isinstance(threshold, (int, float)) else "-"
	size_text = (
	f"{processed_width}x{processed_height}"
	if processed_width > 0 and processed_height > 0
	else "-"
	)
	img_size_attrs = (
	f" width='{processed_width}' height='{processed_height}'"
	if processed_width > 0 and processed_height > 0
	else ""
	)
	cards.append(
	f"""
	<div class='log-card'>
	<div class='log-thumb-wrap'>
	<div class='log-thumb-label'>{processed_label}</div>
	<img class='log-thumb' src='{processed_thumb}' alt='processed face thumbnail'{img_size_attrs} />
	</div>
	<div class='log-main'>
	<div class='log-topline'>
	<div class='log-name'>{identity}</div>
	<div class='log-badge {badge_class}'>{status}</div>
	</div>
	<div class='log-meta'>
	<span>video_time: <b>{video_time}</b></span>
	<span>distance: <b>{distance_text}</b></span>
	<span>threshold: <b>{threshold_text}</b></span>
	<span>closest match: <b>{closest}</b></span>
	<span>preview: <b>{processed_label}</b></span>
	<span>size: <b>{size_text}</b></span>
	</div>
	<div class='log-reason'>{reason}</div>
	<div class='log-meta'>
	<span>bbox: <b>{bbox}</b></span>
	</div>
	</div>
	<div class='log-time'>{event_time}</div>
	</div>
	"""
	)

	return f"<div class='log-feed'>{''.join(cards)}</div>"


	def find_best_match(embedding: List[float], entries: List[Dict[str, Any]], threshold: float) -> Dict[str, Any]:
	best_identity = "Unknown"
	best_distance = float("inf")

	for entry in entries:
	ref_embedding = entry.get("embedding")
	if not ref_embedding:
	continue
	dist = cosine_distance(embedding, ref_embedding)
	if dist < best_distance:
	best_distance = dist
	best_identity = entry.get("user_display") or entry.get("user_folder") or "Unknown"

	matched = best_distance <= threshold
	identity = best_identity if matched else "Unknown"
	if math.isfinite(best_distance):
	if matched:
	reason = (
	f"Matched {best_identity} เพราะ cosine distance {best_distance:.4f} <= threshold {threshold:.2f}"
	)
	else:
	reason = (
	f"Unknown เพราะ cosine distance {best_distance:.4f} > threshold {threshold:.2f}; "
	f"closest match คือ {best_identity}"
	)
	else:
	reason = "ไม่สามารถคำนวณ distance ที่ใช้งานได้จาก embedding ชุดนี้"

	return {
	"identity": identity,
	"best_match_name": best_identity,
	"distance": best_distance,
	"matched": matched,
	"threshold": threshold,
	"reason": reason,
	}


	def find_best_matches_batch(
	embeddings: List[List[float]],
	entries: List[Dict[str, Any]],
	threshold: float,
	ref_matrix: np.ndarray,
	ref_norms: np.ndarray,
	) -> List[Dict[str, Any]]:
	if not embeddings:
	return []

	if ref_matrix.size == 0 or not entries:
	return [
	{
	"identity": "Unknown",
	"best_match_name": "Unknown",
	"distance": float("inf"),
	"matched": False,
	"threshold": threshold,
	"reason": "ไม่มีฐานข้อมูล embedding ที่พร้อมใช้งานสำหรับการเปรียบเทียบ",
	}
	for _ in embeddings
	]

	emb_matrix = np.asarray(embeddings, dtype=np.float32)
	if emb_matrix.ndim == 1:
	emb_matrix = np.expand_dims(emb_matrix, axis=0)

	emb_norms = np.linalg.norm(emb_matrix, axis=1)
	denom = emb_norms[:, None] * ref_norms[None, :]
	safe_denom = np.where(denom == 0, 1e-12, denom)
	similarities = np.matmul(emb_matrix, ref_matrix.T) / safe_denom
	similarities = np.clip(similarities, -1.0, 1.0)
	distances = 1.0 - similarities

	results: List[Dict[str, Any]] = []
	for row in distances:
	best_idx = int(np.argmin(row))
	best_distance = float(row[best_idx])
	best_identity = entries[best_idx].get("user_display") or entries[best_idx].get("user_folder") or "Unknown"
	matched = best_distance <= threshold
	identity = best_identity if matched else "Unknown"
	if matched:
	reason = f"Matched {best_identity} เพราะ cosine distance {best_distance:.4f} <= threshold {threshold:.2f}"
	else:
	reason = (
	f"Unknown เพราะ cosine distance {best_distance:.4f} > threshold {threshold:.2f}; "
	f"closest match คือ {best_identity}"
	)
	results.append(
	{
	"identity": identity,
	"best_match_name": best_identity,
	"distance": best_distance,
	"matched": matched,
	"threshold": threshold,
	"reason": reason,
	}
	)
	return results


	def reset_runtime_panel():
	return (
	"🟦 พร้อมประมวลผลวิดีโอ — อัปโหลดวิดีโอที่แท็บซ้ายแล้วกดเริ่ม",
	None,
	render_log_feed([]),
	)


	def resolve_video_path(video_value):
	if video_value is None:
	return None
	if isinstance(video_value, str):
	return video_value
	if isinstance(video_value, dict):
	return video_value.get("video") or video_value.get("path") or video_value.get("name")
	if isinstance(video_value, (list, tuple)) and len(video_value) > 0:
	return video_value[0]
	return str(video_value)


	def process_video_stream(
	video_value,
	process_fps: float,
	detector_score_threshold: float,
	recognition_threshold: float,
	expand_pct: int,
	log_cooldown_sec: float,
	use_align: bool,
	use_esrgan: bool,
	):
	video_path = resolve_video_path(video_value)

	if not video_path:
	yield "⚠️ กรุณาอัปโหลดวิดีโอก่อนเริ่มประมวลผล", None, render_log_feed([])
	return

	data = load_embeddings()
	entries = data.get("entries", [])
	if not entries:
	rebuild_message, _ = rebuild_embeddings()
	data = load_embeddings()
	entries = data.get("entries", [])
	if not entries:
	yield (
	"⚠️ ยังไม่มีฐานข้อมูลใบหน้าที่พร้อมใช้งาน กรุณาลงทะเบียนรูปบุคคลก่อน\n\n"
	+ rebuild_message,
	None,
	render_log_feed([]),
	)
	return
	valid_entries, ref_matrix, ref_norms = prepare_reference_embeddings(entries)

	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	yield "❌ เปิดวิดีโอไม่ได้", None, render_log_feed([])
	return

	src_fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
	detect_every = max(1, int(round(src_fps / max(process_fps, 0.1))))
	last_faces: List[Dict[str, Any]] = []
	log_items: List[Dict[str, Any]] = []
	last_logged_second: Dict[str, float] = {}
	frame_index = 0
	preview_frame = None

	try:
	while True:
	ok, bgr_frame = cap.read()
	if not ok:
	break

	rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)

	if frame_index % detect_every == 0:
	current_faces: List[Dict[str, Any]] = []
	try:
	detections = RetinaFace.detect_faces(rgb_frame, threshold=detector_score_threshold)
	except Exception:
	detections = {}

	if isinstance(detections, dict):
	faces_sorted = sorted(
	detections.values(),
	key=lambda face: max(0, int(face["facial_area"][2]) - int(face["facial_area"][0]))
	* max(0, int(face["facial_area"][3]) - int(face["facial_area"][1])),
	reverse=True,
	)

	pending_faces: List[Dict[str, Any]] = []
	for face in faces_sorted:
	raw_bbox = [int(v) for v in face["facial_area"]]
	bbox = expand_bbox(raw_bbox, rgb_frame.shape, expand_pct=float(expand_pct))
	raw_face_rgb = crop_rgb(rgb_frame, bbox)
	aligned_face_rgb = align_face_with_retinaface(
	rgb_frame, raw_bbox, expand_pct=float(expand_pct)
	) if use_align else None
	preview_face_rgb = aligned_face_rgb if aligned_face_rgb is not None else crop_rgb(rgb_frame, bbox)
	if preview_face_rgb.size == 0 or raw_face_rgb.size == 0:
	continue

	video_second = frame_index / src_fps if src_fps > 0 else 0.0
	pending_faces.append(
	{
	"bbox": bbox,
	"raw_face_rgb": raw_face_rgb,
	"preview_face_rgb": preview_face_rgb,
	"score": float(face.get("score", 0.0)),
	"video_second": video_second,
	}
	)

	if pending_faces:
	embedding_inputs = [
	maybe_enhance_for_embedding(item["preview_face_rgb"], use_esrgan=use_esrgan)
	for item in pending_faces
	]
	batch_embeddings = represent_images_batch(embedding_inputs)
	matches = find_best_matches_batch(
	embeddings=batch_embeddings,
	entries=valid_entries,
	threshold=recognition_threshold,
	ref_matrix=ref_matrix,
	ref_norms=ref_norms,
	)

	for item, match, processed_face_rgb in zip(pending_faces, matches, embedding_inputs):
	bbox = item["bbox"]
	current_faces.append(
	{
	"bbox": bbox,
	"identity": match["identity"],
	"distance": match["distance"],
	"score": item["score"],
	}
	)

	bucket_x = bbox[0] // 120
	bucket_y = bbox[1] // 120
	dedupe_key = (
	match["best_match_name"]
	if match["matched"]
	else f"Unknown_{bucket_x}_{bucket_y}"
	)

	if item["video_second"] - last_logged_second.get(dedupe_key, -999.0) >= log_cooldown_sec:
	last_logged_second[dedupe_key] = item["video_second"]
	event_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	log_items.insert(
	0,
	{
	"event_time": event_time,
	"video_time": seconds_to_hhmmss(item["video_second"]),
	"identity": match["identity"],
	"best_match_name": match["best_match_name"],
	"distance": match["distance"],
	"bbox": bbox,
	"status": "Matched" if match["matched"] else "Unknown",
	"threshold": match["threshold"],
	"reason": match["reason"],
	"processed_thumbnail": face_to_data_uri(processed_face_rgb),
	"processed_label": (
	("Aligned face" if use_align else "Raw crop")
	+ (" + ESRGAN" if use_esrgan else "")
	),
	"processed_width": int(processed_face_rgb.shape[1]),
	"processed_height": int(processed_face_rgb.shape[0]),
	},
	)
	log_items = log_items[:MAX_LOG_ITEMS]

	last_faces = current_faces

	preview_frame = draw_face_annotations(rgb_frame, last_faces)

	progress = (frame_index + 1) / total_frames * 100.0 if total_frames > 0 else 0.0
	if frame_index % max(1, detect_every) == 0:
	status = (
	f"🔄 กำลังประมวลผลวิดีโอ... {progress:.1f}% \n"
	f"- FPS ต้นฉบับ: `{src_fps:.2f}` \n"
	f"- ประมวลผลทุก ๆ `{detect_every}` เฟรม \n"
	f"- Recognition mode: `micro-batch per detection step` \n"
	f"- Align ระหว่างวิดีโอ: `{'on' if use_align else 'off'}` \n"
	f"- ESRGAN ก่อน embedding: `{'on' if use_esrgan else 'off'}` \n"
	f"- Registered identities: `{len({e.get('user_folder', 'u') for e in entries})}` \n"
	f"- Threshold rule: `distance <= {recognition_threshold:.2f}` ถึงจะนับว่าเป็นคนเดิม \n"
	f"- Log events: `{len(log_items)}`"
	)
	yield status, preview_frame, render_log_feed(log_items)

	frame_index += 1

	except Exception as exc:
	yield f"❌ เกิดข้อผิดพลาดระหว่างประมวลผล: {exc}", preview_frame, render_log_feed(log_items)
	return
	finally:
	cap.release()

	final_status = (
	f"✅ ประมวลผลเสร็จแล้ว \n"
	f"- จำนวน log event: `{len(log_items)}` \n"
	f"- กติกา match: `distance <= {recognition_threshold:.2f}` \n"
	f"- ESRGAN ก่อน embedding: `{'on' if use_esrgan else 'off'}` \n"
	f"- โมเดล recognition: `{RECOGNITION_MODEL}`"
	)
	yield final_status, preview_frame, render_log_feed(log_items)


	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue", neutral_hue="slate"),
	css=CUSTOM_CSS,
	title="CCTV Face Recognition with RetinaFace + DeepFace",
	) as demo:
	gr.HTML(
	"""
	<div class='app-shell'>
	<div class='hero-card'>
	<div style='font-size:30px;font-weight:800;color:#0f172a'>CCTV Face Recognition Dashboard</div>
	<div style='margin-top:8px;color:#334155;font-size:15px;line-height:1.65'>
	แอป Gradio สำหรับ <b>Face Detection + Alignment + Recognition</b> ด้วย
	<b>RetinaFace</b> และ <b>DeepFace (ArcFace)</b><br/>
	ฝั่งซ้ายใช้สำหรับลงทะเบียนใบหน้า, อัปโหลดวิดีโอ, live preview และการตั้งค่าความเร็ว/ความแม่นยำ
	ส่วนฝั่งขวาแสดง face logs แบบสรุปเหตุการณ์พร้อมเหตุผลของผลรู้จำ
	โดยระบบจะล้างฐานข้อมูลใบหน้าทุกครั้งที่เริ่มรันสคริปต์ใหม่
	</div>
	</div>
	</div>
	"""
	)

	with gr.Row(equal_height=False):
	with gr.Column(scale=4, elem_classes=["panel"]):
	gr.Markdown("### ฝั่งซ้าย • อัปโหลดและตั้งค่า")
	with gr.Tabs():
	with gr.Tab("1) Upload รูปลงทะเบียน"):
	reg_image = gr.Image(
	type="numpy",
	label="อัปโหลดรูปบุคคล",
	sources=["upload", "webcam"],
	height=330,
	)
	reg_user = gr.Textbox(
	label="User",
	placeholder="เช่น admin01",
	)
	reg_use_esrgan = gr.Checkbox(
	label="Use ESRGAN before embedding",
	value=False,
	info="เหมาะกับรูปจากกล้องที่เบลอหรือรายละเอียดน้อย แต่จะช้ากว่าปกติและโหลดโมเดลครั้งแรกอาจใช้เวลา",
	)
	with gr.Row():
	reg_save_btn = gr.Button("บันทึกลงระบบ", elem_classes=["blue-btn"])
	reg_refresh_btn = gr.Button("รีเฟรชฐานข้อมูล", elem_classes=["neutral-btn"])
	reg_status = gr.Markdown(value="พร้อมลงทะเบียน")
	reg_gallery = gr.Gallery(
	label="รูปใบหน้าที่เก็บของ user นี้",
	columns=4,
	height=260,
	object_fit="cover",
	)
	db_summary = gr.HTML(render_db_overview())

	with gr.Tab("2) Upload video + Config"):
	video_input = gr.Video(
	sources=["upload"],
	label="อัปโหลดวิดีโอจากกล้องวงจรปิด",
	height=330,
	)
	live_preview = gr.Image(
	label="Live Preview (วาด BBox ขณะประมวลผล)",
	type="numpy",
	interactive=False,
	height=330,
	)
	process_fps = gr.Slider(
	minimum=1,
	maximum=12,
	value=3,
	step=1,
	label="Process FPS",
	info="จำนวนเฟรมต่อวินาทีที่ใช้ตรวจจับและจดจำใบหน้า",
	)
	detector_score_threshold = gr.Slider(
	minimum=0.10,
	maximum=0.99,
	value=0.85,
	step=0.01,
	label="Detection Score Threshold",
	)
	recognition_threshold = gr.Slider(
	minimum=0.15,
	maximum=1.00,
	value=0.85,
	step=0.01,
	label="Recognition Threshold (Cosine Distance)",
	info="กติกาคือ distance ต้องน้อยกว่าหรือเท่ากับค่านี้จึงจะ match; ค่าเริ่มต้น 0.85 จะผ่อนกว่าค่าเดิม",
	)
	expand_pct = gr.Slider(
	minimum=0,
	maximum=30,
	value=10,
	step=1,
	label="Expand Face Area (%)",
	)
	log_cooldown = gr.Slider(
	minimum=0.0,
	maximum=10.0,
	value=0,
	step=0.5,
	label="Log Cooldown (seconds)",
	info="ลด log ซ้ำของคนเดิมในช่วงเวลาใกล้กัน",
	)
	use_align = gr.Checkbox(
	label="Use face alignment",
	value=True,
	info="ปิดไว้จะเร็วสุดสำหรับวิดีโอ; เปิดเมื่ออยากเพิ่มความนิ่งของใบหน้าก่อนสร้าง embedding",
	)
	use_esrgan = gr.Checkbox(
	label="Use ESRGAN before embedding",
	value=False,
	info="ปรับรายละเอียด face crop จากกล้องก่อนทำ embedding; ช่วยกับภาพเล็กหรือแตก แต่จะช้าลงชัดเจน",
	)
	with gr.Row():
	start_btn = gr.Button("เริ่มประมวลผล", elem_classes=["blue-btn"])
	clear_btn = gr.Button("ล้างผลลัพธ์", elem_classes=["neutral-btn"])
	gr.HTML(
	"""
	<div class='soft-note'>
	ใช้ <b>RetinaFace.detect_faces</b> สำหรับตรวจจับ,
	เปิด <b>Use face alignment</b> เมื่อต้องการจัดแนวใบหน้าก่อน recognition,
	เปิด <b>Use ESRGAN before embedding</b> เมื่อต้องการเพิ่มรายละเอียด face crop จากภาพกล้องก่อนสร้าง embedding,
	และใช้ <b>DeepFace.represent</b> ด้วยโมเดล <b>ArcFace</b> สำหรับ recognition โดยคำนวณจากภาพใน memory เพื่อลดเวลา I/O
	</div>
	"""
	)

	with gr.Column(scale=6, elem_classes=["panel"]):
	gr.Markdown("### ฝั่งขวา • Face Logs")
	runtime_status = gr.Markdown(
	value="🟦 พร้อมประมวลผลวิดีโอ — อัปโหลดวิดีโอที่แท็บซ้ายแล้วกดเริ่ม",
	elem_classes=["status-box"],
	)
	face_log_feed = gr.HTML(value=render_log_feed([]))

	reg_save_btn.click(
	fn=register_face,
	inputs=[reg_image, reg_user, reg_use_esrgan],
	outputs=[reg_status, reg_gallery, db_summary],
	)

	reg_refresh_btn.click(
	fn=rebuild_embeddings,
	inputs=[],
	outputs=[reg_status, db_summary],
	)

	start_btn.click(
	fn=process_video_stream,
	inputs=[
	video_input,
	process_fps,
	detector_score_threshold,
	recognition_threshold,
	expand_pct,
	log_cooldown,
	use_align,
	use_esrgan,
	],
	outputs=[runtime_status, live_preview, face_log_feed],
	)

	clear_btn.click(
	fn=reset_runtime_panel,
	inputs=[],
	outputs=[runtime_status, live_preview, face_log_feed],
	)

	demo.queue(default_concurrency_limit=1)

	if __name__ == "__main__":
	modeling.build_model(task="facial_recognition", model_name=RECOGNITION_MODEL)
	demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")),share=False)